]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-parloops.c
clang-format: split content of a list to multiple lines
[thirdparty/gcc.git] / gcc / tree-parloops.c
CommitLineData
5f40b3cb 1/* Loop autoparallelization.
5624e564 2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
70837b71
RL
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5f40b3cb
ZD
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
6da7fc87 10Software Foundation; either version 3, or (at your option) any later
5f40b3cb
ZD
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
6da7fc87
NC
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
5f40b3cb
ZD
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
40e23961 26#include "tree.h"
c7131fb2 27#include "gimple.h"
957060b5
AM
28#include "cfghooks.h"
29#include "tree-pass.h"
c7131fb2 30#include "ssa.h"
957060b5
AM
31#include "cgraph.h"
32#include "gimple-pretty-print.h"
c7131fb2 33#include "fold-const.h"
45b0be94 34#include "gimplify.h"
5be5c238 35#include "gimple-iterator.h"
18f429e2 36#include "gimplify-me.h"
5be5c238 37#include "gimple-walk.h"
d8a2d370
DN
38#include "stor-layout.h"
39#include "tree-nested.h"
442b4905 40#include "tree-cfg.h"
e28030cf
AM
41#include "tree-ssa-loop-ivopts.h"
42#include "tree-ssa-loop-manip.h"
43#include "tree-ssa-loop-niter.h"
442b4905
AM
44#include "tree-ssa-loop.h"
45#include "tree-into-ssa.h"
5f40b3cb 46#include "cfgloop.h"
1bd6497c 47#include "tree-scalar-evolution.h"
5f40b3cb 48#include "langhooks.h"
a509ebb5 49#include "tree-vectorizer.h"
4a8fb1a1 50#include "tree-hasher.h"
c1bf2a39 51#include "tree-parloops.h"
0645c1a2 52#include "omp-low.h"
7c82d827 53#include "tree-ssa.h"
f7f18684 54#include "params.h"
1f600fea 55#include "params-enum.h"
5f40b3cb
ZD
56
57/* This pass tries to distribute iterations of loops into several threads.
58 The implementation is straightforward -- for each loop we test whether its
59 iterations are independent, and if it is the case (and some additional
60 conditions regarding profitability and correctness are satisfied), we
726a989a
RB
61 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
62 machinery do its job.
b8698a0f 63
5f40b3cb
ZD
64 The most of the complexity is in bringing the code into shape expected
65 by the omp expanders:
726a989a
RB
66 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
67 variable and that the exit test is at the start of the loop body
68 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
5f40b3cb
ZD
69 variables by accesses through pointers, and breaking up ssa chains
70 by storing the values incoming to the parallelized loop to a structure
71 passed to the new function as an argument (something similar is done
72 in omp gimplification, unfortunately only a small part of the code
73 can be shared).
74
75 TODO:
76 -- if there are several parallelizable loops in a function, it may be
77 possible to generate the threads just once (using synchronization to
78 ensure that cross-loop dependences are obeyed).
70837b71
RL
79 -- handling of common reduction patterns for outer loops.
80
81 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
b8698a0f 82/*
a509ebb5 83 Reduction handling:
8a9ecffd 84 currently we use vect_force_simple_reduction() to detect reduction patterns.
a509ebb5 85 The code transformation will be introduced by an example.
b8698a0f
L
86
87
a509ebb5
RL
88parloop
89{
90 int sum=1;
91
0eb7e7aa 92 for (i = 0; i < N; i++)
a509ebb5
RL
93 {
94 x[i] = i + 3;
95 sum+=x[i];
96 }
97}
98
0eb7e7aa 99gimple-like code:
a509ebb5
RL
100header_bb:
101
0eb7e7aa
RL
102 # sum_29 = PHI <sum_11(5), 1(3)>
103 # i_28 = PHI <i_12(5), 0(3)>
104 D.1795_8 = i_28 + 3;
105 x[i_28] = D.1795_8;
106 sum_11 = D.1795_8 + sum_29;
107 i_12 = i_28 + 1;
108 if (N_6(D) > i_12)
109 goto header_bb;
110
a509ebb5
RL
111
112exit_bb:
113
0eb7e7aa
RL
114 # sum_21 = PHI <sum_11(4)>
115 printf (&"%d"[0], sum_21);
a509ebb5
RL
116
117
118after reduction transformation (only relevant parts):
119
120parloop
121{
122
123....
124
0eb7e7aa 125
fa10beec 126 # Storing the initial value given by the user. #
0eb7e7aa 127
ae0bce62 128 .paral_data_store.32.sum.27 = 1;
b8698a0f
L
129
130 #pragma omp parallel num_threads(4)
a509ebb5 131
0eb7e7aa 132 #pragma omp for schedule(static)
ae0bce62
RL
133
134 # The neutral element corresponding to the particular
135 reduction's operation, e.g. 0 for PLUS_EXPR,
136 1 for MULT_EXPR, etc. replaces the user's initial value. #
137
138 # sum.27_29 = PHI <sum.27_11, 0>
139
0eb7e7aa 140 sum.27_11 = D.1827_8 + sum.27_29;
ae0bce62 141
726a989a 142 GIMPLE_OMP_CONTINUE
a509ebb5 143
0eb7e7aa
RL
144 # Adding this reduction phi is done at create_phi_for_local_result() #
145 # sum.27_56 = PHI <sum.27_11, 0>
726a989a 146 GIMPLE_OMP_RETURN
b8698a0f
L
147
148 # Creating the atomic operation is done at
0eb7e7aa 149 create_call_for_reduction_1() #
a509ebb5 150
0eb7e7aa
RL
151 #pragma omp atomic_load
152 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
153 D.1840_60 = sum.27_56 + D.1839_59;
154 #pragma omp atomic_store (D.1840_60);
b8698a0f 155
726a989a 156 GIMPLE_OMP_RETURN
b8698a0f 157
0eb7e7aa
RL
158 # collecting the result after the join of the threads is done at
159 create_loads_for_reductions().
ae0bce62
RL
160 The value computed by the threads is loaded from the
161 shared struct. #
162
b8698a0f 163
0eb7e7aa 164 .paral_data_load.33_52 = &.paral_data_store.32;
ae0bce62 165 sum_37 = .paral_data_load.33_52->sum.27;
0eb7e7aa
RL
166 sum_43 = D.1795_41 + sum_37;
167
168 exit bb:
169 # sum_21 = PHI <sum_43, sum_26>
170 printf (&"%d"[0], sum_21);
171
172...
173
a509ebb5
RL
174}
175
176*/
177
5f40b3cb
ZD
178/* Minimal number of iterations of a loop that should be executed in each
179 thread. */
180#define MIN_PER_THREAD 100
181
b8698a0f 182/* Element of the hashtable, representing a
a509ebb5
RL
183 reduction in the current loop. */
184struct reduction_info
185{
355fe088
TS
186 gimple *reduc_stmt; /* reduction statement. */
187 gimple *reduc_phi; /* The phi node defining the reduction. */
726a989a 188 enum tree_code reduction_code;/* code for the reduction operation. */
5d1fd1de
JJ
189 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
190 result. */
538dd0b7 191 gphi *keep_res; /* The PHI_RESULT of this phi is the resulting value
a509ebb5 192 of the reduction variable when existing the loop. */
ae0bce62 193 tree initial_value; /* The initial value of the reduction var before entering the loop. */
a509ebb5 194 tree field; /* the name of the field in the parloop data structure intended for reduction. */
a509ebb5 195 tree init; /* reduction initialization value. */
538dd0b7 196 gphi *new_phi; /* (helper field) Newly created phi node whose result
a509ebb5
RL
197 will be passed to the atomic operation. Represents
198 the local result each thread computed for the reduction
199 operation. */
200};
201
4a8fb1a1 202/* Reduction info hashtable helpers. */
a509ebb5 203
95fbe13e 204struct reduction_hasher : free_ptr_hash <reduction_info>
a509ebb5 205{
67f58944
TS
206 static inline hashval_t hash (const reduction_info *);
207 static inline bool equal (const reduction_info *, const reduction_info *);
4a8fb1a1
LC
208};
209
210/* Equality and hash functions for hashtab code. */
a509ebb5 211
4a8fb1a1 212inline bool
67f58944 213reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
4a8fb1a1 214{
a509ebb5
RL
215 return (a->reduc_phi == b->reduc_phi);
216}
217
4a8fb1a1 218inline hashval_t
67f58944 219reduction_hasher::hash (const reduction_info *a)
a509ebb5 220{
5d1fd1de 221 return a->reduc_version;
a509ebb5
RL
222}
223
c203e8a7 224typedef hash_table<reduction_hasher> reduction_info_table_type;
4a8fb1a1
LC
225
226
a509ebb5 227static struct reduction_info *
355fe088 228reduction_phi (reduction_info_table_type *reduction_list, gimple *phi)
a509ebb5
RL
229{
230 struct reduction_info tmpred, *red;
231
c203e8a7 232 if (reduction_list->elements () == 0 || phi == NULL)
a509ebb5
RL
233 return NULL;
234
fdce493d
TV
235 if (gimple_uid (phi) == (unsigned int)-1
236 || gimple_uid (phi) == 0)
237 return NULL;
238
a509ebb5 239 tmpred.reduc_phi = phi;
5d1fd1de 240 tmpred.reduc_version = gimple_uid (phi);
c203e8a7 241 red = reduction_list->find (&tmpred);
fdce493d 242 gcc_assert (red == NULL || red->reduc_phi == phi);
a509ebb5
RL
243
244 return red;
245}
246
5f40b3cb
ZD
247/* Element of hashtable of names to copy. */
248
249struct name_to_copy_elt
250{
251 unsigned version; /* The version of the name to copy. */
252 tree new_name; /* The new name used in the copy. */
253 tree field; /* The field of the structure used to pass the
254 value. */
255};
256
4a8fb1a1 257/* Name copies hashtable helpers. */
5f40b3cb 258
95fbe13e 259struct name_to_copy_hasher : free_ptr_hash <name_to_copy_elt>
5f40b3cb 260{
67f58944
TS
261 static inline hashval_t hash (const name_to_copy_elt *);
262 static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
4a8fb1a1
LC
263};
264
265/* Equality and hash functions for hashtab code. */
5f40b3cb 266
4a8fb1a1 267inline bool
67f58944 268name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
4a8fb1a1 269{
5f40b3cb
ZD
270 return a->version == b->version;
271}
272
4a8fb1a1 273inline hashval_t
67f58944 274name_to_copy_hasher::hash (const name_to_copy_elt *a)
5f40b3cb 275{
5f40b3cb
ZD
276 return (hashval_t) a->version;
277}
278
c203e8a7 279typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
4a8fb1a1 280
b305e3da
SP
281/* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
282 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
283 represents the denominator for every element in the matrix. */
284typedef struct lambda_trans_matrix_s
285{
286 lambda_matrix matrix;
287 int rowsize;
288 int colsize;
289 int denominator;
290} *lambda_trans_matrix;
291#define LTM_MATRIX(T) ((T)->matrix)
292#define LTM_ROWSIZE(T) ((T)->rowsize)
293#define LTM_COLSIZE(T) ((T)->colsize)
294#define LTM_DENOMINATOR(T) ((T)->denominator)
295
296/* Allocate a new transformation matrix. */
297
298static lambda_trans_matrix
299lambda_trans_matrix_new (int colsize, int rowsize,
300 struct obstack * lambda_obstack)
301{
302 lambda_trans_matrix ret;
303
304 ret = (lambda_trans_matrix)
305 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
306 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
307 LTM_ROWSIZE (ret) = rowsize;
308 LTM_COLSIZE (ret) = colsize;
309 LTM_DENOMINATOR (ret) = 1;
310 return ret;
311}
312
313/* Multiply a vector VEC by a matrix MAT.
314 MAT is an M*N matrix, and VEC is a vector with length N. The result
315 is stored in DEST which must be a vector of length M. */
316
317static void
318lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
319 lambda_vector vec, lambda_vector dest)
320{
321 int i, j;
322
323 lambda_vector_clear (dest, m);
324 for (i = 0; i < m; i++)
325 for (j = 0; j < n; j++)
326 dest[i] += matrix[i][j] * vec[j];
327}
328
329/* Return true if TRANS is a legal transformation matrix that respects
330 the dependence vectors in DISTS and DIRS. The conservative answer
331 is false.
332
333 "Wolfe proves that a unimodular transformation represented by the
334 matrix T is legal when applied to a loop nest with a set of
335 lexicographically non-negative distance vectors RDG if and only if
336 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
337 i.e.: if and only if it transforms the lexicographically positive
338 distance vectors to lexicographically positive vectors. Note that
339 a unimodular matrix must transform the zero vector (and only it) to
340 the zero vector." S.Muchnick. */
341
342static bool
343lambda_transform_legal_p (lambda_trans_matrix trans,
344 int nb_loops,
9771b263 345 vec<ddr_p> dependence_relations)
b305e3da
SP
346{
347 unsigned int i, j;
348 lambda_vector distres;
349 struct data_dependence_relation *ddr;
350
351 gcc_assert (LTM_COLSIZE (trans) == nb_loops
352 && LTM_ROWSIZE (trans) == nb_loops);
353
354 /* When there are no dependences, the transformation is correct. */
9771b263 355 if (dependence_relations.length () == 0)
b305e3da
SP
356 return true;
357
9771b263 358 ddr = dependence_relations[0];
b305e3da
SP
359 if (ddr == NULL)
360 return true;
361
362 /* When there is an unknown relation in the dependence_relations, we
363 know that it is no worth looking at this loop nest: give up. */
364 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
365 return false;
366
367 distres = lambda_vector_new (nb_loops);
368
369 /* For each distance vector in the dependence graph. */
9771b263 370 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
b305e3da
SP
371 {
372 /* Don't care about relations for which we know that there is no
373 dependence, nor about read-read (aka. output-dependences):
374 these data accesses can happen in any order. */
375 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
376 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
377 continue;
378
379 /* Conservatively answer: "this transformation is not valid". */
380 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
381 return false;
382
383 /* If the dependence could not be captured by a distance vector,
384 conservatively answer that the transform is not valid. */
385 if (DDR_NUM_DIST_VECTS (ddr) == 0)
386 return false;
387
388 /* Compute trans.dist_vect */
389 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
390 {
391 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
392 DDR_DIST_VECT (ddr, j), distres);
393
394 if (!lambda_vector_lexico_pos (distres, nb_loops))
395 return false;
396 }
397 }
398 return true;
399}
08dab97a
RL
400
401/* Data dependency analysis. Returns true if the iterations of LOOP
402 are independent on each other (that is, if we can execute them
403 in parallel). */
5f40b3cb
ZD
404
405static bool
f873b205 406loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
5f40b3cb 407{
9771b263
DN
408 vec<ddr_p> dependence_relations;
409 vec<data_reference_p> datarefs;
5f40b3cb
ZD
410 lambda_trans_matrix trans;
411 bool ret = false;
5f40b3cb
ZD
412
413 if (dump_file && (dump_flags & TDF_DETAILS))
48710229
RL
414 {
415 fprintf (dump_file, "Considering loop %d\n", loop->num);
416 if (!loop->inner)
417 fprintf (dump_file, "loop is innermost\n");
b8698a0f 418 else
48710229
RL
419 fprintf (dump_file, "loop NOT innermost\n");
420 }
5f40b3cb 421
5f40b3cb
ZD
422 /* Check for problems with dependences. If the loop can be reversed,
423 the iterations are independent. */
00f96dc9 424 auto_vec<loop_p, 3> loop_nest;
9771b263 425 datarefs.create (10);
07687835 426 dependence_relations.create (100);
9ca3d00e
AB
427 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
428 &dependence_relations))
429 {
430 if (dump_file && (dump_flags & TDF_DETAILS))
431 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
432 ret = false;
433 goto end;
434 }
5f40b3cb
ZD
435 if (dump_file && (dump_flags & TDF_DETAILS))
436 dump_data_dependence_relations (dump_file, dependence_relations);
437
f873b205 438 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
5f40b3cb
ZD
439 LTM_MATRIX (trans)[0][0] = -1;
440
441 if (lambda_transform_legal_p (trans, 1, dependence_relations))
442 {
443 ret = true;
444 if (dump_file && (dump_flags & TDF_DETAILS))
445 fprintf (dump_file, " SUCCESS: may be parallelized\n");
446 }
447 else if (dump_file && (dump_flags & TDF_DETAILS))
a509ebb5
RL
448 fprintf (dump_file,
449 " FAILED: data dependencies exist across iterations\n");
5f40b3cb 450
9ca3d00e 451 end:
5f40b3cb
ZD
452 free_dependence_relations (dependence_relations);
453 free_data_refs (datarefs);
454
455 return ret;
456}
457
1d4af1e8
SP
458/* Return true when LOOP contains basic blocks marked with the
459 BB_IRREDUCIBLE_LOOP flag. */
460
461static inline bool
462loop_has_blocks_with_irreducible_flag (struct loop *loop)
463{
464 unsigned i;
465 basic_block *bbs = get_loop_body_in_dom_order (loop);
466 bool res = true;
467
468 for (i = 0; i < loop->num_nodes; i++)
469 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
470 goto end;
471
472 res = false;
473 end:
474 free (bbs);
475 return res;
476}
477
8a171a59 478/* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
9f9f72aa 479 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
8a171a59 480 to their addresses that can be reused. The address of OBJ is known to
cba1eb61
JJ
481 be invariant in the whole function. Other needed statements are placed
482 right before GSI. */
5f40b3cb
ZD
483
484static tree
4a8fb1a1 485take_address_of (tree obj, tree type, edge entry,
c203e8a7 486 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
5f40b3cb 487{
8a171a59 488 int uid;
83d5977e 489 tree *var_p, name, addr;
538dd0b7 490 gassign *stmt;
726a989a 491 gimple_seq stmts;
5f40b3cb 492
8a171a59
ZD
493 /* Since the address of OBJ is invariant, the trees may be shared.
494 Avoid rewriting unrelated parts of the code. */
495 obj = unshare_expr (obj);
496 for (var_p = &obj;
497 handled_component_p (*var_p);
498 var_p = &TREE_OPERAND (*var_p, 0))
499 continue;
8a171a59 500
c9a410f0
RG
501 /* Canonicalize the access to base on a MEM_REF. */
502 if (DECL_P (*var_p))
503 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
504
505 /* Assign a canonical SSA name to the address of the base decl used
506 in the address and share it for all accesses and addresses based
507 on it. */
508 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
84baa4b9
TS
509 int_tree_map elt;
510 elt.uid = uid;
511 int_tree_map *slot = decl_address->find_slot (elt, INSERT);
512 if (!slot->to)
5f40b3cb 513 {
cba1eb61
JJ
514 if (gsi == NULL)
515 return NULL;
c9a410f0 516 addr = TREE_OPERAND (*var_p, 0);
29b89442
JJ
517 const char *obj_name
518 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
519 if (obj_name)
520 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
521 else
b731b390 522 name = make_ssa_name (TREE_TYPE (addr));
83d5977e 523 stmt = gimple_build_assign (name, addr);
726a989a 524 gsi_insert_on_edge_immediate (entry, stmt);
5f40b3cb 525
84baa4b9
TS
526 slot->uid = uid;
527 slot->to = name;
5f40b3cb 528 }
8a171a59 529 else
84baa4b9 530 name = slot->to;
5f40b3cb 531
c9a410f0
RG
532 /* Express the address in terms of the canonical SSA name. */
533 TREE_OPERAND (*var_p, 0) = name;
cba1eb61
JJ
534 if (gsi == NULL)
535 return build_fold_addr_expr_with_type (obj, type);
536
aa00059c 537 name = force_gimple_operand (build_addr (obj),
c9a410f0
RG
538 &stmts, true, NULL_TREE);
539 if (!gimple_seq_empty_p (stmts))
cba1eb61 540 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
5f40b3cb 541
c9a410f0 542 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
8a171a59 543 {
726a989a 544 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
8a171a59 545 NULL_TREE);
726a989a 546 if (!gimple_seq_empty_p (stmts))
cba1eb61 547 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
8a171a59 548 }
5f40b3cb
ZD
549
550 return name;
551}
552
12efb1d7 553static tree
355fe088 554reduc_stmt_res (gimple *stmt)
12efb1d7
TV
555{
556 return (gimple_code (stmt) == GIMPLE_PHI
557 ? gimple_phi_result (stmt)
558 : gimple_assign_lhs (stmt));
559}
560
a509ebb5 561/* Callback for htab_traverse. Create the initialization statement
b8698a0f 562 for reduction described in SLOT, and place it at the preheader of
a509ebb5
RL
563 the loop described in DATA. */
564
4a8fb1a1
LC
565int
566initialize_reductions (reduction_info **slot, struct loop *loop)
a509ebb5 567{
f2c9f71d
TS
568 tree init;
569 tree type, arg;
a509ebb5
RL
570 edge e;
571
4a8fb1a1 572 struct reduction_info *const reduc = *slot;
a509ebb5 573
b8698a0f 574 /* Create initialization in preheader:
a509ebb5
RL
575 reduction_variable = initialization value of reduction. */
576
b8698a0f 577 /* In the phi node at the header, replace the argument coming
a509ebb5
RL
578 from the preheader with the reduction initialization value. */
579
f2c9f71d 580 /* Initialize the reduction. */
a509ebb5 581 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
f2c9f71d
TS
582 init = omp_reduction_init_op (gimple_location (reduc->reduc_stmt),
583 reduc->reduction_code, type);
a509ebb5
RL
584 reduc->init = init;
585
b8698a0f
L
586 /* Replace the argument representing the initialization value
587 with the initialization value for the reduction (neutral
588 element for the particular operation, e.g. 0 for PLUS_EXPR,
589 1 for MULT_EXPR, etc).
590 Keep the old value in a new variable "reduction_initial",
591 that will be taken in consideration after the parallel
0eb7e7aa 592 computing is done. */
a509ebb5
RL
593
594 e = loop_preheader_edge (loop);
595 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
596 /* Create new variable to hold the initial value. */
a509ebb5 597
a509ebb5 598 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
0eb7e7aa 599 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
ae0bce62 600 reduc->initial_value = arg;
a509ebb5
RL
601 return 1;
602}
5f40b3cb
ZD
603
604struct elv_data
605{
726a989a 606 struct walk_stmt_info info;
9f9f72aa 607 edge entry;
c203e8a7 608 int_tree_htab_type *decl_address;
cba1eb61 609 gimple_stmt_iterator *gsi;
5f40b3cb 610 bool changed;
cba1eb61 611 bool reset;
5f40b3cb
ZD
612};
613
9f9f72aa
AP
614/* Eliminates references to local variables in *TP out of the single
615 entry single exit region starting at DTA->ENTRY.
616 DECL_ADDRESS contains addresses of the references that had their
617 address taken already. If the expression is changed, CHANGED is
618 set to true. Callback for walk_tree. */
a509ebb5 619
5f40b3cb 620static tree
8a171a59 621eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
5f40b3cb 622{
3d9a9f94 623 struct elv_data *const dta = (struct elv_data *) data;
8a171a59 624 tree t = *tp, var, addr, addr_type, type, obj;
5f40b3cb
ZD
625
626 if (DECL_P (t))
627 {
628 *walk_subtrees = 0;
629
630 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
631 return NULL_TREE;
632
633 type = TREE_TYPE (t);
634 addr_type = build_pointer_type (type);
cba1eb61
JJ
635 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
636 dta->gsi);
637 if (dta->gsi == NULL && addr == NULL_TREE)
638 {
639 dta->reset = true;
640 return NULL_TREE;
641 }
642
70f34814 643 *tp = build_simple_mem_ref (addr);
5f40b3cb
ZD
644
645 dta->changed = true;
646 return NULL_TREE;
647 }
648
649 if (TREE_CODE (t) == ADDR_EXPR)
650 {
8a171a59
ZD
651 /* ADDR_EXPR may appear in two contexts:
652 -- as a gimple operand, when the address taken is a function invariant
653 -- as gimple rhs, when the resulting address in not a function
654 invariant
655 We do not need to do anything special in the latter case (the base of
656 the memory reference whose address is taken may be replaced in the
657 DECL_P case). The former case is more complicated, as we need to
658 ensure that the new address is still a gimple operand. Thus, it
659 is not sufficient to replace just the base of the memory reference --
660 we need to move the whole computation of the address out of the
661 loop. */
662 if (!is_gimple_val (t))
5f40b3cb
ZD
663 return NULL_TREE;
664
665 *walk_subtrees = 0;
8a171a59
ZD
666 obj = TREE_OPERAND (t, 0);
667 var = get_base_address (obj);
668 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
5f40b3cb
ZD
669 return NULL_TREE;
670
671 addr_type = TREE_TYPE (t);
cba1eb61
JJ
672 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
673 dta->gsi);
674 if (dta->gsi == NULL && addr == NULL_TREE)
675 {
676 dta->reset = true;
677 return NULL_TREE;
678 }
5f40b3cb
ZD
679 *tp = addr;
680
681 dta->changed = true;
682 return NULL_TREE;
683 }
684
726a989a 685 if (!EXPR_P (t))
5f40b3cb
ZD
686 *walk_subtrees = 0;
687
688 return NULL_TREE;
689}
690
cba1eb61 691/* Moves the references to local variables in STMT at *GSI out of the single
9f9f72aa
AP
692 entry single exit region starting at ENTRY. DECL_ADDRESS contains
693 addresses of the references that had their address taken
694 already. */
5f40b3cb
ZD
695
696static void
cba1eb61 697eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
c203e8a7 698 int_tree_htab_type *decl_address)
5f40b3cb
ZD
699{
700 struct elv_data dta;
355fe088 701 gimple *stmt = gsi_stmt (*gsi);
5f40b3cb 702
726a989a 703 memset (&dta.info, '\0', sizeof (dta.info));
9f9f72aa 704 dta.entry = entry;
5f40b3cb
ZD
705 dta.decl_address = decl_address;
706 dta.changed = false;
cba1eb61 707 dta.reset = false;
5f40b3cb 708
b5b8b0ac 709 if (gimple_debug_bind_p (stmt))
cba1eb61
JJ
710 {
711 dta.gsi = NULL;
712 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
713 eliminate_local_variables_1, &dta.info, NULL);
714 if (dta.reset)
715 {
716 gimple_debug_bind_reset_value (stmt);
717 dta.changed = true;
718 }
719 }
29b89442
JJ
720 else if (gimple_clobber_p (stmt))
721 {
722 stmt = gimple_build_nop ();
723 gsi_replace (gsi, stmt, false);
724 dta.changed = true;
725 }
b5b8b0ac 726 else
cba1eb61
JJ
727 {
728 dta.gsi = gsi;
729 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
730 }
5f40b3cb
ZD
731
732 if (dta.changed)
733 update_stmt (stmt);
734}
735
9f9f72aa
AP
736/* Eliminates the references to local variables from the single entry
737 single exit region between the ENTRY and EXIT edges.
b8698a0f 738
a509ebb5 739 This includes:
b8698a0f
L
740 1) Taking address of a local variable -- these are moved out of the
741 region (and temporary variable is created to hold the address if
a509ebb5 742 necessary).
9f9f72aa 743
5f40b3cb 744 2) Dereferencing a local variable -- these are replaced with indirect
a509ebb5 745 references. */
5f40b3cb
ZD
746
747static void
9f9f72aa 748eliminate_local_variables (edge entry, edge exit)
5f40b3cb 749{
9f9f72aa 750 basic_block bb;
00f96dc9 751 auto_vec<basic_block, 3> body;
5f40b3cb 752 unsigned i;
726a989a 753 gimple_stmt_iterator gsi;
cba1eb61 754 bool has_debug_stmt = false;
c203e8a7 755 int_tree_htab_type decl_address (10);
9f9f72aa
AP
756 basic_block entry_bb = entry->src;
757 basic_block exit_bb = exit->dest;
5f40b3cb 758
9f9f72aa 759 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 760
9771b263 761 FOR_EACH_VEC_ELT (body, i, bb)
9f9f72aa 762 if (bb != entry_bb && bb != exit_bb)
726a989a 763 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
ddb555ed
JJ
764 if (is_gimple_debug (gsi_stmt (gsi)))
765 {
766 if (gimple_debug_bind_p (gsi_stmt (gsi)))
767 has_debug_stmt = true;
768 }
cba1eb61 769 else
c203e8a7 770 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
cba1eb61
JJ
771
772 if (has_debug_stmt)
9771b263 773 FOR_EACH_VEC_ELT (body, i, bb)
cba1eb61
JJ
774 if (bb != entry_bb && bb != exit_bb)
775 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
776 if (gimple_debug_bind_p (gsi_stmt (gsi)))
c203e8a7 777 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
9f9f72aa
AP
778}
779
780/* Returns true if expression EXPR is not defined between ENTRY and
781 EXIT, i.e. if all its operands are defined outside of the region. */
782
783static bool
784expr_invariant_in_region_p (edge entry, edge exit, tree expr)
785{
786 basic_block entry_bb = entry->src;
787 basic_block exit_bb = exit->dest;
788 basic_block def_bb;
9f9f72aa
AP
789
790 if (is_gimple_min_invariant (expr))
791 return true;
792
793 if (TREE_CODE (expr) == SSA_NAME)
794 {
726a989a 795 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
9f9f72aa
AP
796 if (def_bb
797 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
798 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
799 return false;
800
801 return true;
802 }
803
726a989a 804 return false;
5f40b3cb
ZD
805}
806
807/* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
808 The copies are stored to NAME_COPIES, if NAME was already duplicated,
809 its duplicate stored in NAME_COPIES is returned.
b8698a0f 810
5f40b3cb
ZD
811 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
812 duplicated, storing the copies in DECL_COPIES. */
813
814static tree
c203e8a7
TS
815separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
816 int_tree_htab_type *decl_copies,
817 bool copy_name_p)
5f40b3cb
ZD
818{
819 tree copy, var, var_copy;
820 unsigned idx, uid, nuid;
84baa4b9 821 struct int_tree_map ielt;
5f40b3cb 822 struct name_to_copy_elt elt, *nelt;
4a8fb1a1 823 name_to_copy_elt **slot;
84baa4b9 824 int_tree_map *dslot;
5f40b3cb
ZD
825
826 if (TREE_CODE (name) != SSA_NAME)
827 return name;
828
829 idx = SSA_NAME_VERSION (name);
830 elt.version = idx;
c203e8a7
TS
831 slot = name_copies->find_slot_with_hash (&elt, idx,
832 copy_name_p ? INSERT : NO_INSERT);
5f40b3cb 833 if (slot && *slot)
4a8fb1a1 834 return (*slot)->new_name;
5f40b3cb 835
70b5e7dc
RG
836 if (copy_name_p)
837 {
838 copy = duplicate_ssa_name (name, NULL);
839 nelt = XNEW (struct name_to_copy_elt);
840 nelt->version = idx;
841 nelt->new_name = copy;
842 nelt->field = NULL_TREE;
843 *slot = nelt;
844 }
845 else
846 {
847 gcc_assert (!slot);
848 copy = name;
849 }
850
5f40b3cb 851 var = SSA_NAME_VAR (name);
70b5e7dc
RG
852 if (!var)
853 return copy;
854
5f40b3cb
ZD
855 uid = DECL_UID (var);
856 ielt.uid = uid;
84baa4b9
TS
857 dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
858 if (!dslot->to)
5f40b3cb
ZD
859 {
860 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
36ad7922 861 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
84baa4b9
TS
862 dslot->uid = uid;
863 dslot->to = var_copy;
5f40b3cb
ZD
864
865 /* Ensure that when we meet this decl next time, we won't duplicate
a509ebb5 866 it again. */
5f40b3cb
ZD
867 nuid = DECL_UID (var_copy);
868 ielt.uid = nuid;
84baa4b9
TS
869 dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
870 gcc_assert (!dslot->to);
871 dslot->uid = nuid;
872 dslot->to = var_copy;
5f40b3cb
ZD
873 }
874 else
84baa4b9 875 var_copy = dslot->to;
5f40b3cb 876
b2ec94d4 877 replace_ssa_name_symbol (copy, var_copy);
5f40b3cb
ZD
878 return copy;
879}
880
9f9f72aa
AP
881/* Finds the ssa names used in STMT that are defined outside the
882 region between ENTRY and EXIT and replaces such ssa names with
883 their duplicates. The duplicates are stored to NAME_COPIES. Base
884 decls of all ssa names used in STMT (including those defined in
885 LOOP) are replaced with the new temporary variables; the
886 replacement decls are stored in DECL_COPIES. */
5f40b3cb
ZD
887
888static void
355fe088 889separate_decls_in_region_stmt (edge entry, edge exit, gimple *stmt,
c203e8a7
TS
890 name_to_copy_table_type *name_copies,
891 int_tree_htab_type *decl_copies)
5f40b3cb
ZD
892{
893 use_operand_p use;
894 def_operand_p def;
895 ssa_op_iter oi;
896 tree name, copy;
897 bool copy_name_p;
898
5f40b3cb 899 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
a509ebb5
RL
900 {
901 name = DEF_FROM_PTR (def);
902 gcc_assert (TREE_CODE (name) == SSA_NAME);
9f9f72aa
AP
903 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
904 false);
a509ebb5
RL
905 gcc_assert (copy == name);
906 }
5f40b3cb
ZD
907
908 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
a509ebb5
RL
909 {
910 name = USE_FROM_PTR (use);
911 if (TREE_CODE (name) != SSA_NAME)
912 continue;
913
9f9f72aa
AP
914 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
915 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
916 copy_name_p);
a509ebb5
RL
917 SET_USE (use, copy);
918 }
5f40b3cb
ZD
919}
920
b5b8b0ac
AO
921/* Finds the ssa names used in STMT that are defined outside the
922 region between ENTRY and EXIT and replaces such ssa names with
923 their duplicates. The duplicates are stored to NAME_COPIES. Base
924 decls of all ssa names used in STMT (including those defined in
925 LOOP) are replaced with the new temporary variables; the
926 replacement decls are stored in DECL_COPIES. */
927
928static bool
355fe088 929separate_decls_in_region_debug (gimple *stmt,
c203e8a7
TS
930 name_to_copy_table_type *name_copies,
931 int_tree_htab_type *decl_copies)
b5b8b0ac
AO
932{
933 use_operand_p use;
934 ssa_op_iter oi;
935 tree var, name;
936 struct int_tree_map ielt;
937 struct name_to_copy_elt elt;
4a8fb1a1 938 name_to_copy_elt **slot;
84baa4b9 939 int_tree_map *dslot;
b5b8b0ac 940
ddb555ed
JJ
941 if (gimple_debug_bind_p (stmt))
942 var = gimple_debug_bind_get_var (stmt);
943 else if (gimple_debug_source_bind_p (stmt))
944 var = gimple_debug_source_bind_get_var (stmt);
945 else
946 return true;
598e67d7 947 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
4f2a9af8 948 return true;
b5b8b0ac
AO
949 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
950 ielt.uid = DECL_UID (var);
84baa4b9 951 dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
b5b8b0ac
AO
952 if (!dslot)
953 return true;
ddb555ed 954 if (gimple_debug_bind_p (stmt))
84baa4b9 955 gimple_debug_bind_set_var (stmt, dslot->to);
ddb555ed 956 else if (gimple_debug_source_bind_p (stmt))
84baa4b9 957 gimple_debug_source_bind_set_var (stmt, dslot->to);
b5b8b0ac
AO
958
959 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
960 {
961 name = USE_FROM_PTR (use);
962 if (TREE_CODE (name) != SSA_NAME)
963 continue;
964
965 elt.version = SSA_NAME_VERSION (name);
c203e8a7 966 slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
b5b8b0ac
AO
967 if (!slot)
968 {
969 gimple_debug_bind_reset_value (stmt);
970 update_stmt (stmt);
971 break;
972 }
973
4a8fb1a1 974 SET_USE (use, (*slot)->new_name);
b5b8b0ac
AO
975 }
976
977 return false;
978}
979
0eb7e7aa
RL
980/* Callback for htab_traverse. Adds a field corresponding to the reduction
981 specified in SLOT. The type is passed in DATA. */
982
4a8fb1a1
LC
983int
984add_field_for_reduction (reduction_info **slot, tree type)
a509ebb5 985{
b8698a0f 986
4a8fb1a1 987 struct reduction_info *const red = *slot;
12efb1d7 988 tree var = reduc_stmt_res (red->reduc_stmt);
aa06a978
RB
989 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
990 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
0eb7e7aa
RL
991
992 insert_field_into_struct (type, field);
993
994 red->field = field;
995
996 return 1;
997}
a509ebb5 998
5f40b3cb 999/* Callback for htab_traverse. Adds a field corresponding to a ssa name
b8698a0f 1000 described in SLOT. The type is passed in DATA. */
5f40b3cb 1001
4a8fb1a1
LC
1002int
1003add_field_for_name (name_to_copy_elt **slot, tree type)
5f40b3cb 1004{
4a8fb1a1 1005 struct name_to_copy_elt *const elt = *slot;
5f40b3cb 1006 tree name = ssa_name (elt->version);
70b5e7dc
RG
1007 tree field = build_decl (UNKNOWN_LOCATION,
1008 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1009 TREE_TYPE (name));
5f40b3cb
ZD
1010
1011 insert_field_into_struct (type, field);
1012 elt->field = field;
a509ebb5 1013
5f40b3cb
ZD
1014 return 1;
1015}
1016
b8698a0f
L
1017/* Callback for htab_traverse. A local result is the intermediate result
1018 computed by a single
fa10beec 1019 thread, or the initial value in case no iteration was executed.
b8698a0f
L
1020 This function creates a phi node reflecting these values.
1021 The phi's result will be stored in NEW_PHI field of the
1022 reduction's data structure. */
a509ebb5 1023
4a8fb1a1
LC
1024int
1025create_phi_for_local_result (reduction_info **slot, struct loop *loop)
a509ebb5 1026{
4a8fb1a1 1027 struct reduction_info *const reduc = *slot;
a509ebb5 1028 edge e;
538dd0b7 1029 gphi *new_phi;
e67d7a1e 1030 basic_block store_bb, continue_bb;
a509ebb5 1031 tree local_res;
f5045c96 1032 source_location locus;
a509ebb5 1033
b8698a0f
L
1034 /* STORE_BB is the block where the phi
1035 should be stored. It is the destination of the loop exit.
726a989a 1036 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
e67d7a1e
TV
1037 continue_bb = single_pred (loop->latch);
1038 store_bb = FALLTHRU_EDGE (continue_bb)->dest;
a509ebb5
RL
1039
1040 /* STORE_BB has two predecessors. One coming from the loop
1041 (the reduction's result is computed at the loop),
b8698a0f
L
1042 and another coming from a block preceding the loop,
1043 when no iterations
1044 are executed (the initial value should be taken). */
e67d7a1e 1045 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (continue_bb))
a509ebb5
RL
1046 e = EDGE_PRED (store_bb, 1);
1047 else
1048 e = EDGE_PRED (store_bb, 0);
12efb1d7
TV
1049 tree lhs = reduc_stmt_res (reduc->reduc_stmt);
1050 local_res = copy_ssa_name (lhs);
f5045c96 1051 locus = gimple_location (reduc->reduc_stmt);
a509ebb5 1052 new_phi = create_phi_node (local_res, store_bb);
9e227d60 1053 add_phi_arg (new_phi, reduc->init, e, locus);
e67d7a1e 1054 add_phi_arg (new_phi, lhs, FALLTHRU_EDGE (continue_bb), locus);
a509ebb5
RL
1055 reduc->new_phi = new_phi;
1056
1057 return 1;
1058}
5f40b3cb
ZD
1059
1060struct clsn_data
1061{
1062 tree store;
1063 tree load;
1064
1065 basic_block store_bb;
1066 basic_block load_bb;
1067};
1068
a509ebb5 1069/* Callback for htab_traverse. Create an atomic instruction for the
b8698a0f 1070 reduction described in SLOT.
a509ebb5
RL
1071 DATA annotates the place in memory the atomic operation relates to,
1072 and the basic block it needs to be generated in. */
1073
4a8fb1a1
LC
1074int
1075create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
a509ebb5 1076{
4a8fb1a1 1077 struct reduction_info *const reduc = *slot;
726a989a 1078 gimple_stmt_iterator gsi;
a509ebb5 1079 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
a509ebb5
RL
1080 tree load_struct;
1081 basic_block bb;
1082 basic_block new_bb;
1083 edge e;
0f900dfa 1084 tree t, addr, ref, x;
726a989a 1085 tree tmp_load, name;
355fe088 1086 gimple *load;
a509ebb5 1087
70f34814 1088 load_struct = build_simple_mem_ref (clsn_data->load);
a509ebb5 1089 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
a509ebb5 1090
aa00059c 1091 addr = build_addr (t);
a509ebb5
RL
1092
1093 /* Create phi node. */
1094 bb = clsn_data->load_bb;
1095
b13c907a
RB
1096 gsi = gsi_last_bb (bb);
1097 e = split_block (bb, gsi_stmt (gsi));
a509ebb5
RL
1098 new_bb = e->dest;
1099
b731b390
JJ
1100 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1101 tmp_load = make_ssa_name (tmp_load);
726a989a 1102 load = gimple_build_omp_atomic_load (tmp_load, addr);
a509ebb5 1103 SSA_NAME_DEF_STMT (tmp_load) = load;
726a989a
RB
1104 gsi = gsi_start_bb (new_bb);
1105 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
a509ebb5
RL
1106
1107 e = split_block (new_bb, load);
1108 new_bb = e->dest;
726a989a 1109 gsi = gsi_start_bb (new_bb);
a509ebb5 1110 ref = tmp_load;
726a989a
RB
1111 x = fold_build2 (reduc->reduction_code,
1112 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1113 PHI_RESULT (reduc->new_phi));
a509ebb5 1114
726a989a
RB
1115 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1116 GSI_CONTINUE_LINKING);
a509ebb5 1117
726a989a 1118 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
a509ebb5
RL
1119 return 1;
1120}
1121
b8698a0f
L
1122/* Create the atomic operation at the join point of the threads.
1123 REDUCTION_LIST describes the reductions in the LOOP.
1124 LD_ST_DATA describes the shared data structure where
a509ebb5
RL
1125 shared data is stored in and loaded from. */
1126static void
4a8fb1a1 1127create_call_for_reduction (struct loop *loop,
c203e8a7 1128 reduction_info_table_type *reduction_list,
a509ebb5
RL
1129 struct clsn_data *ld_st_data)
1130{
c203e8a7 1131 reduction_list->traverse <struct loop *, create_phi_for_local_result> (loop);
726a989a 1132 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
e67d7a1e
TV
1133 basic_block continue_bb = single_pred (loop->latch);
1134 ld_st_data->load_bb = FALLTHRU_EDGE (continue_bb)->dest;
4a8fb1a1 1135 reduction_list
c203e8a7 1136 ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
a509ebb5
RL
1137}
1138
ae0bce62
RL
1139/* Callback for htab_traverse. Loads the final reduction value at the
1140 join point of all threads, and inserts it in the right place. */
a509ebb5 1141
4a8fb1a1
LC
1142int
1143create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
a509ebb5 1144{
4a8fb1a1 1145 struct reduction_info *const red = *slot;
355fe088 1146 gimple *stmt;
726a989a 1147 gimple_stmt_iterator gsi;
12efb1d7 1148 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
a509ebb5 1149 tree load_struct;
ae0bce62 1150 tree name;
a509ebb5
RL
1151 tree x;
1152
79855460
TV
1153 /* If there's no exit phi, the result of the reduction is unused. */
1154 if (red->keep_res == NULL)
1155 return 1;
1156
726a989a 1157 gsi = gsi_after_labels (clsn_data->load_bb);
70f34814 1158 load_struct = build_simple_mem_ref (clsn_data->load);
a509ebb5
RL
1159 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1160 NULL_TREE);
a509ebb5 1161
ae0bce62 1162 x = load_struct;
a509ebb5 1163 name = PHI_RESULT (red->keep_res);
726a989a 1164 stmt = gimple_build_assign (name, x);
a509ebb5 1165
726a989a 1166 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
a509ebb5 1167
726a989a
RB
1168 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1169 !gsi_end_p (gsi); gsi_next (&gsi))
1170 if (gsi_stmt (gsi) == red->keep_res)
1171 {
1172 remove_phi_node (&gsi, false);
1173 return 1;
1174 }
1175 gcc_unreachable ();
a509ebb5
RL
1176}
1177
b8698a0f 1178/* Load the reduction result that was stored in LD_ST_DATA.
a509ebb5 1179 REDUCTION_LIST describes the list of reductions that the
fa10beec 1180 loads should be generated for. */
a509ebb5 1181static void
c203e8a7 1182create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
a509ebb5
RL
1183 struct clsn_data *ld_st_data)
1184{
726a989a 1185 gimple_stmt_iterator gsi;
a509ebb5 1186 tree t;
355fe088 1187 gimple *stmt;
a509ebb5 1188
726a989a 1189 gsi = gsi_after_labels (ld_st_data->load_bb);
a509ebb5 1190 t = build_fold_addr_expr (ld_st_data->store);
726a989a 1191 stmt = gimple_build_assign (ld_st_data->load, t);
a509ebb5 1192
726a989a 1193 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
a509ebb5 1194
4a8fb1a1 1195 reduction_list
c203e8a7 1196 ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
a509ebb5
RL
1197
1198}
1199
0eb7e7aa
RL
1200/* Callback for htab_traverse. Store the neutral value for the
1201 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1202 1 for MULT_EXPR, etc. into the reduction field.
b8698a0f
L
1203 The reduction is specified in SLOT. The store information is
1204 passed in DATA. */
0eb7e7aa 1205
4a8fb1a1
LC
1206int
1207create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
0eb7e7aa 1208{
4a8fb1a1 1209 struct reduction_info *const red = *slot;
726a989a 1210 tree t;
355fe088 1211 gimple *stmt;
726a989a 1212 gimple_stmt_iterator gsi;
12efb1d7 1213 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
726a989a
RB
1214
1215 gsi = gsi_last_bb (clsn_data->store_bb);
1216 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1217 stmt = gimple_build_assign (t, red->initial_value);
726a989a 1218 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
0eb7e7aa
RL
1219
1220 return 1;
1221}
1222
a509ebb5
RL
1223/* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1224 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1225 specified in SLOT. */
1226
4a8fb1a1
LC
1227int
1228create_loads_and_stores_for_name (name_to_copy_elt **slot,
1229 struct clsn_data *clsn_data)
5f40b3cb 1230{
4a8fb1a1 1231 struct name_to_copy_elt *const elt = *slot;
726a989a 1232 tree t;
355fe088 1233 gimple *stmt;
726a989a 1234 gimple_stmt_iterator gsi;
5f40b3cb 1235 tree type = TREE_TYPE (elt->new_name);
5f40b3cb
ZD
1236 tree load_struct;
1237
726a989a
RB
1238 gsi = gsi_last_bb (clsn_data->store_bb);
1239 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1240 stmt = gimple_build_assign (t, ssa_name (elt->version));
726a989a 1241 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb 1242
726a989a 1243 gsi = gsi_last_bb (clsn_data->load_bb);
70f34814 1244 load_struct = build_simple_mem_ref (clsn_data->load);
726a989a
RB
1245 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1246 stmt = gimple_build_assign (elt->new_name, t);
726a989a 1247 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1248
1249 return 1;
1250}
1251
1252/* Moves all the variables used in LOOP and defined outside of it (including
1253 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1254 name) to a structure created for this purpose. The code
b8698a0f 1255
5f40b3cb
ZD
1256 while (1)
1257 {
1258 use (a);
1259 use (b);
1260 }
1261
1262 is transformed this way:
1263
1264 bb0:
1265 old.a = a;
1266 old.b = b;
1267
1268 bb1:
1269 a' = new->a;
1270 b' = new->b;
1271 while (1)
1272 {
1273 use (a');
1274 use (b');
1275 }
1276
1277 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1278 pointer `new' is intentionally not initialized (the loop will be split to a
1279 separate function later, and `new' will be initialized from its arguments).
a509ebb5 1280 LD_ST_DATA holds information about the shared data structure used to pass
b8698a0f
L
1281 information among the threads. It is initialized here, and
1282 gen_parallel_loop will pass it to create_call_for_reduction that
1283 needs this information. REDUCTION_LIST describes the reductions
a509ebb5 1284 in LOOP. */
5f40b3cb
ZD
1285
1286static void
4a8fb1a1 1287separate_decls_in_region (edge entry, edge exit,
c203e8a7 1288 reduction_info_table_type *reduction_list,
b8698a0f 1289 tree *arg_struct, tree *new_arg_struct,
9f9f72aa 1290 struct clsn_data *ld_st_data)
a509ebb5 1291
5f40b3cb 1292{
9f9f72aa 1293 basic_block bb1 = split_edge (entry);
5f40b3cb 1294 basic_block bb0 = single_pred (bb1);
c203e8a7
TS
1295 name_to_copy_table_type name_copies (10);
1296 int_tree_htab_type decl_copies (10);
5f40b3cb 1297 unsigned i;
726a989a
RB
1298 tree type, type_name, nvar;
1299 gimple_stmt_iterator gsi;
5f40b3cb 1300 struct clsn_data clsn_data;
00f96dc9 1301 auto_vec<basic_block, 3> body;
9f9f72aa
AP
1302 basic_block bb;
1303 basic_block entry_bb = bb1;
1304 basic_block exit_bb = exit->dest;
b5b8b0ac 1305 bool has_debug_stmt = false;
5f40b3cb 1306
726a989a 1307 entry = single_succ_edge (entry_bb);
9f9f72aa 1308 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 1309
9771b263 1310 FOR_EACH_VEC_ELT (body, i, bb)
9f9f72aa 1311 {
b8698a0f 1312 if (bb != entry_bb && bb != exit_bb)
9f9f72aa 1313 {
726a989a
RB
1314 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1315 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
c203e8a7 1316 &name_copies, &decl_copies);
726a989a
RB
1317
1318 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
b5b8b0ac 1319 {
355fe088 1320 gimple *stmt = gsi_stmt (gsi);
b5b8b0ac
AO
1321
1322 if (is_gimple_debug (stmt))
1323 has_debug_stmt = true;
1324 else
1325 separate_decls_in_region_stmt (entry, exit, stmt,
c203e8a7 1326 &name_copies, &decl_copies);
b5b8b0ac 1327 }
9f9f72aa 1328 }
5f40b3cb 1329 }
9f9f72aa 1330
b5b8b0ac
AO
1331 /* Now process debug bind stmts. We must not create decls while
1332 processing debug stmts, so we defer their processing so as to
1333 make sure we will have debug info for as many variables as
1334 possible (all of those that were dealt with in the loop above),
1335 and discard those for which we know there's nothing we can
1336 do. */
1337 if (has_debug_stmt)
9771b263 1338 FOR_EACH_VEC_ELT (body, i, bb)
b5b8b0ac
AO
1339 if (bb != entry_bb && bb != exit_bb)
1340 {
1341 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1342 {
355fe088 1343 gimple *stmt = gsi_stmt (gsi);
b5b8b0ac 1344
ddb555ed 1345 if (is_gimple_debug (stmt))
b5b8b0ac 1346 {
c203e8a7
TS
1347 if (separate_decls_in_region_debug (stmt, &name_copies,
1348 &decl_copies))
b5b8b0ac
AO
1349 {
1350 gsi_remove (&gsi, true);
1351 continue;
1352 }
1353 }
1354
1355 gsi_next (&gsi);
1356 }
1357 }
1358
c203e8a7 1359 if (name_copies.elements () == 0 && reduction_list->elements () == 0)
5f40b3cb
ZD
1360 {
1361 /* It may happen that there is nothing to copy (if there are only
a509ebb5 1362 loop carried and external variables in the loop). */
5f40b3cb
ZD
1363 *arg_struct = NULL;
1364 *new_arg_struct = NULL;
1365 }
1366 else
1367 {
1368 /* Create the type for the structure to store the ssa names to. */
1369 type = lang_hooks.types.make_type (RECORD_TYPE);
9ff70652 1370 type_name = build_decl (UNKNOWN_LOCATION,
c2255bc4 1371 TYPE_DECL, create_tmp_var_name (".paral_data"),
5f40b3cb
ZD
1372 type);
1373 TYPE_NAME (type) = type_name;
1374
4a8fb1a1 1375 name_copies.traverse <tree, add_field_for_name> (type);
c203e8a7 1376 if (reduction_list && reduction_list->elements () > 0)
0eb7e7aa
RL
1377 {
1378 /* Create the fields for reductions. */
c203e8a7 1379 reduction_list->traverse <tree, add_field_for_reduction> (type);
0eb7e7aa 1380 }
5f40b3cb 1381 layout_type (type);
b8698a0f 1382
5f40b3cb
ZD
1383 /* Create the loads and stores. */
1384 *arg_struct = create_tmp_var (type, ".paral_data_store");
5f40b3cb 1385 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
b731b390 1386 *new_arg_struct = make_ssa_name (nvar);
5f40b3cb 1387
a509ebb5
RL
1388 ld_st_data->store = *arg_struct;
1389 ld_st_data->load = *new_arg_struct;
1390 ld_st_data->store_bb = bb0;
1391 ld_st_data->load_bb = bb1;
0eb7e7aa 1392
4a8fb1a1
LC
1393 name_copies
1394 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1395 (ld_st_data);
a509ebb5 1396
ae0bce62
RL
1397 /* Load the calculation from memory (after the join of the threads). */
1398
c203e8a7 1399 if (reduction_list && reduction_list->elements () > 0)
a509ebb5 1400 {
4a8fb1a1 1401 reduction_list
c203e8a7
TS
1402 ->traverse <struct clsn_data *, create_stores_for_reduction>
1403 (ld_st_data);
b731b390 1404 clsn_data.load = make_ssa_name (nvar);
9f9f72aa 1405 clsn_data.load_bb = exit->dest;
a509ebb5
RL
1406 clsn_data.store = ld_st_data->store;
1407 create_final_loads_for_reduction (reduction_list, &clsn_data);
1408 }
5f40b3cb 1409 }
5f40b3cb
ZD
1410}
1411
a79b7ec5 1412/* Returns true if FN was created to run in parallel. */
5f40b3cb 1413
62e0a1ed 1414bool
a79b7ec5 1415parallelized_function_p (tree fndecl)
5f40b3cb 1416{
a79b7ec5
TV
1417 cgraph_node *node = cgraph_node::get (fndecl);
1418 gcc_assert (node != NULL);
1419 return node->parallelized_function;
5f40b3cb
ZD
1420}
1421
1422/* Creates and returns an empty function that will receive the body of
1423 a parallelized loop. */
1424
1425static tree
9ff70652 1426create_loop_fn (location_t loc)
5f40b3cb
ZD
1427{
1428 char buf[100];
1429 char *tname;
1430 tree decl, type, name, t;
1431 struct function *act_cfun = cfun;
1432 static unsigned loopfn_num;
1433
5368224f 1434 loc = LOCATION_LOCUS (loc);
5f40b3cb
ZD
1435 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1436 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1437 clean_symbol_name (tname);
1438 name = get_identifier (tname);
1439 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1440
9ff70652 1441 decl = build_decl (loc, FUNCTION_DECL, name, type);
5f40b3cb
ZD
1442 TREE_STATIC (decl) = 1;
1443 TREE_USED (decl) = 1;
1444 DECL_ARTIFICIAL (decl) = 1;
1445 DECL_IGNORED_P (decl) = 0;
1446 TREE_PUBLIC (decl) = 0;
1447 DECL_UNINLINABLE (decl) = 1;
1448 DECL_EXTERNAL (decl) = 0;
1449 DECL_CONTEXT (decl) = NULL_TREE;
1450 DECL_INITIAL (decl) = make_node (BLOCK);
1451
9ff70652 1452 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
5f40b3cb
ZD
1453 DECL_ARTIFICIAL (t) = 1;
1454 DECL_IGNORED_P (t) = 1;
1455 DECL_RESULT (decl) = t;
1456
9ff70652 1457 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
5f40b3cb
ZD
1458 ptr_type_node);
1459 DECL_ARTIFICIAL (t) = 1;
1460 DECL_ARG_TYPE (t) = ptr_type_node;
1461 DECL_CONTEXT (t) = decl;
1462 TREE_USED (t) = 1;
1463 DECL_ARGUMENTS (decl) = t;
1464
182e0d71 1465 allocate_struct_function (decl, false);
5f40b3cb
ZD
1466
1467 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1468 it. */
5576d6f2 1469 set_cfun (act_cfun);
5f40b3cb
ZD
1470
1471 return decl;
1472}
1473
7c82d827
TV
1474/* Replace uses of NAME by VAL in block BB. */
1475
1476static void
1477replace_uses_in_bb_by (tree name, tree val, basic_block bb)
1478{
355fe088 1479 gimple *use_stmt;
7c82d827
TV
1480 imm_use_iterator imm_iter;
1481
1482 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
1483 {
1484 if (gimple_bb (use_stmt) != bb)
1485 continue;
1486
1487 use_operand_p use_p;
1488 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1489 SET_USE (use_p, val);
1490 }
1491}
1492
7c82d827
TV
1493/* Do transformation from:
1494
1495 <bb preheader>:
1496 ...
1497 goto <bb header>
1498
1499 <bb header>:
1500 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1501 sum_a = PHI <sum_init (preheader), sum_b (latch)>
1502 ...
1503 use (ivtmp_a)
1504 ...
1505 sum_b = sum_a + sum_update
1506 ...
1507 if (ivtmp_a < n)
1508 goto <bb latch>;
1509 else
1510 goto <bb exit>;
1511
1512 <bb latch>:
1513 ivtmp_b = ivtmp_a + 1;
1514 goto <bb header>
1515
1516 <bb exit>:
712cb0bb 1517 sum_z = PHI <sum_b (cond[1]), ...>
7c82d827
TV
1518
1519 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
1520 that's <bb header>.
1521
1522 to:
1523
1524 <bb preheader>:
1525 ...
1526 goto <bb newheader>
1527
1528 <bb header>:
1529 ivtmp_a = PHI <ivtmp_c (latch)>
1530 sum_a = PHI <sum_c (latch)>
1531 ...
1532 use (ivtmp_a)
1533 ...
1534 sum_b = sum_a + sum_update
1535 ...
1536 goto <bb latch>;
1537
1538 <bb newheader>:
1539 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1540 sum_c = PHI <sum_init (preheader), sum_b (latch)>
1541 if (ivtmp_c < n + 1)
1542 goto <bb header>;
1543 else
712cb0bb 1544 goto <bb newexit>;
7c82d827
TV
1545
1546 <bb latch>:
1547 ivtmp_b = ivtmp_a + 1;
1548 goto <bb newheader>
1549
712cb0bb
TV
1550 <bb newexit>:
1551 sum_y = PHI <sum_c (newheader)>
1552
7c82d827 1553 <bb exit>:
712cb0bb 1554 sum_z = PHI <sum_y (newexit), ...>
7c82d827
TV
1555
1556
1557 In unified diff format:
1558
1559 <bb preheader>:
1560 ...
1561- goto <bb header>
1562+ goto <bb newheader>
1563
1564 <bb header>:
1565- ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1566- sum_a = PHI <sum_init (preheader), sum_b (latch)>
1567+ ivtmp_a = PHI <ivtmp_c (latch)>
1568+ sum_a = PHI <sum_c (latch)>
1569 ...
1570 use (ivtmp_a)
1571 ...
1572 sum_b = sum_a + sum_update
1573 ...
1574- if (ivtmp_a < n)
1575- goto <bb latch>;
1576+ goto <bb latch>;
1577+
1578+ <bb newheader>:
1579+ ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1580+ sum_c = PHI <sum_init (preheader), sum_b (latch)>
1581+ if (ivtmp_c < n + 1)
1582+ goto <bb header>;
1583 else
1584 goto <bb exit>;
1585
1586 <bb latch>:
1587 ivtmp_b = ivtmp_a + 1;
1588- goto <bb header>
1589+ goto <bb newheader>
1590
712cb0bb
TV
1591+ <bb newexit>:
1592+ sum_y = PHI <sum_c (newheader)>
1593
7c82d827 1594 <bb exit>:
712cb0bb
TV
1595- sum_z = PHI <sum_b (cond[1]), ...>
1596+ sum_z = PHI <sum_y (newexit), ...>
7c82d827
TV
1597
1598 Note: the example does not show any virtual phis, but these are handled more
1599 or less as reductions.
b8698a0f 1600
7c82d827
TV
1601
1602 Moves the exit condition of LOOP to the beginning of its header.
1603 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
1604 bound. */
1605
1606static void
1607transform_to_exit_first_loop_alt (struct loop *loop,
1608 reduction_info_table_type *reduction_list,
1609 tree bound)
1610{
1611 basic_block header = loop->header;
1612 basic_block latch = loop->latch;
1613 edge exit = single_dom_exit (loop);
1614 basic_block exit_block = exit->dest;
1615 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1616 tree control = gimple_cond_lhs (cond_stmt);
1617 edge e;
1618
338392ed
TV
1619 /* Rewriting virtuals into loop-closed ssa normal form makes this
1620 transformation simpler. It also ensures that the virtuals are in
1621 loop-closed ssa normal from after the transformation, which is required by
1622 create_parallel_loop. */
1623 rewrite_virtuals_into_loop_closed_ssa (loop);
7c82d827
TV
1624
1625 /* Create the new_header block. */
1626 basic_block new_header = split_block_before_cond_jump (exit->src);
712cb0bb 1627 edge edge_at_split = single_pred_edge (new_header);
7c82d827
TV
1628
1629 /* Redirect entry edge to new_header. */
1630 edge entry = loop_preheader_edge (loop);
1631 e = redirect_edge_and_branch (entry, new_header);
1632 gcc_assert (e == entry);
1633
1634 /* Redirect post_inc_edge to new_header. */
1635 edge post_inc_edge = single_succ_edge (latch);
1636 e = redirect_edge_and_branch (post_inc_edge, new_header);
1637 gcc_assert (e == post_inc_edge);
1638
1639 /* Redirect post_cond_edge to header. */
1640 edge post_cond_edge = single_pred_edge (latch);
1641 e = redirect_edge_and_branch (post_cond_edge, header);
1642 gcc_assert (e == post_cond_edge);
1643
712cb0bb
TV
1644 /* Redirect edge_at_split to latch. */
1645 e = redirect_edge_and_branch (edge_at_split, latch);
1646 gcc_assert (e == edge_at_split);
7c82d827
TV
1647
1648 /* Set the new loop bound. */
1649 gimple_cond_set_rhs (cond_stmt, bound);
5a5fd951 1650 update_stmt (cond_stmt);
7c82d827
TV
1651
1652 /* Repair the ssa. */
1653 vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
1654 edge_var_map *vm;
1655 gphi_iterator gsi;
338392ed 1656 int i;
7c82d827
TV
1657 for (gsi = gsi_start_phis (header), i = 0;
1658 !gsi_end_p (gsi) && v->iterate (i, &vm);
1659 gsi_next (&gsi), i++)
1660 {
1661 gphi *phi = gsi.phi ();
1662 tree res_a = PHI_RESULT (phi);
1663
1664 /* Create new phi. */
1665 tree res_c = copy_ssa_name (res_a, phi);
1666 gphi *nphi = create_phi_node (res_c, new_header);
1667
1668 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
1669 replace_uses_in_bb_by (res_a, res_c, new_header);
1670
1671 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
1672 add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
1673
338392ed 1674 /* Replace sum_b with sum_c in exit phi. */
7c82d827 1675 tree res_b = redirect_edge_var_map_def (vm);
338392ed 1676 replace_uses_in_bb_by (res_b, res_c, exit_block);
7c82d827
TV
1677
1678 struct reduction_info *red = reduction_phi (reduction_list, phi);
1679 gcc_assert (virtual_operand_p (res_a)
1680 || res_a == control
1681 || red != NULL);
1682
1683 if (red)
1684 {
1685 /* Register the new reduction phi. */
1686 red->reduc_phi = nphi;
1687 gimple_set_uid (red->reduc_phi, red->reduc_version);
1688 }
1689 }
1690 gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
7c82d827
TV
1691
1692 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
1693 flush_pending_stmts (entry);
1694
1695 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
1696 flush_pending_stmts (post_inc_edge);
1697
d42ba2d2
TV
1698
1699 basic_block new_exit_block = NULL;
1700 if (!single_pred_p (exit->dest))
1701 {
1702 /* Create a new empty exit block, inbetween the new loop header and the
1703 old exit block. The function separate_decls_in_region needs this block
1704 to insert code that is active on loop exit, but not any other path. */
1705 new_exit_block = split_edge (exit);
1706 }
712cb0bb
TV
1707
1708 /* Insert and register the reduction exit phis. */
7c82d827
TV
1709 for (gphi_iterator gsi = gsi_start_phis (exit_block);
1710 !gsi_end_p (gsi);
1711 gsi_next (&gsi))
1712 {
1713 gphi *phi = gsi.phi ();
d42ba2d2 1714 gphi *nphi = NULL;
7c82d827 1715 tree res_z = PHI_RESULT (phi);
d42ba2d2 1716 tree res_c;
712cb0bb 1717
d42ba2d2
TV
1718 if (new_exit_block != NULL)
1719 {
1720 /* Now that we have a new exit block, duplicate the phi of the old
1721 exit block in the new exit block to preserve loop-closed ssa. */
1722 edge succ_new_exit_block = single_succ_edge (new_exit_block);
1723 edge pred_new_exit_block = single_pred_edge (new_exit_block);
1724 tree res_y = copy_ssa_name (res_z, phi);
1725 nphi = create_phi_node (res_y, new_exit_block);
1726 res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
1727 add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
1728 add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
1729 }
1730 else
1731 res_c = PHI_ARG_DEF_FROM_EDGE (phi, exit);
712cb0bb 1732
7c82d827
TV
1733 if (virtual_operand_p (res_z))
1734 continue;
1735
355fe088 1736 gimple *reduc_phi = SSA_NAME_DEF_STMT (res_c);
7c82d827
TV
1737 struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
1738 if (red != NULL)
d42ba2d2
TV
1739 red->keep_res = (nphi != NULL
1740 ? nphi
1741 : phi);
7c82d827
TV
1742 }
1743
1744 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
1745 then we're still using some fields, so only bother about fields that are
1746 still used: header and latch.
1747 The loop has a new header bb, so we update it. The latch bb stays the
1748 same. */
1749 loop->header = new_header;
1750
1751 /* Recalculate dominance info. */
1752 free_dominance_info (CDI_DOMINATORS);
1753 calculate_dominance_info (CDI_DOMINATORS);
4a4b6c4c
TV
1754
1755 checking_verify_ssa (true, true);
7c82d827
TV
1756}
1757
1758/* Tries to moves the exit condition of LOOP to the beginning of its header
1759 without duplication of the loop body. NIT is the number of iterations of the
1760 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
1761 transformation is successful. */
1762
1763static bool
1764try_transform_to_exit_first_loop_alt (struct loop *loop,
1765 reduction_info_table_type *reduction_list,
1766 tree nit)
1767{
1768 /* Check whether the latch contains a single statement. */
1b7f61eb
TV
1769 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
1770 return false;
7c82d827
TV
1771
1772 /* Check whether the latch contains the loop iv increment. */
1773 edge back = single_succ_edge (loop->latch);
1774 edge exit = single_dom_exit (loop);
1775 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1776 tree control = gimple_cond_lhs (cond_stmt);
1777 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
1778 tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
1779 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
1780 return false;
1781
1782 /* Check whether there's no code between the loop condition and the latch. */
1783 if (!single_pred_p (loop->latch)
1784 || single_pred (loop->latch) != exit->src)
1785 return false;
1786
1787 tree alt_bound = NULL_TREE;
1788 tree nit_type = TREE_TYPE (nit);
1789
1790 /* Figure out whether nit + 1 overflows. */
1791 if (TREE_CODE (nit) == INTEGER_CST)
1792 {
1793 if (!tree_int_cst_equal (nit, TYPE_MAXVAL (nit_type)))
1794 {
1795 alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
1796 nit, build_one_cst (nit_type));
1797
1798 gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
fd7b3ef5
TV
1799 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1800 return true;
7c82d827
TV
1801 }
1802 else
1803 {
1804 /* Todo: Figure out if we can trigger this, if it's worth to handle
1805 optimally, and if we can handle it optimally. */
fd7b3ef5 1806 return false;
7c82d827
TV
1807 }
1808 }
7c82d827 1809
fd7b3ef5 1810 gcc_assert (TREE_CODE (nit) == SSA_NAME);
7c82d827 1811
4f75d608
TV
1812 /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
1813 iv with base 0 and step 1 that is incremented in the latch, like this:
1814
1815 <bb header>:
1816 # iv_1 = PHI <0 (preheader), iv_2 (latch)>
1817 ...
1818 if (iv_1 < nit)
1819 goto <bb latch>;
1820 else
1821 goto <bb exit>;
1822
1823 <bb latch>:
1824 iv_2 = iv_1 + 1;
1825 goto <bb header>;
1826
1827 The range of iv_1 is [0, nit]. The latch edge is taken for
1828 iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit. So the
1829 number of latch executions is equal to nit.
1830
1831 The function max_loop_iterations gives us the maximum number of latch
1832 executions, so it gives us the maximum value of nit. */
1833 widest_int nit_max;
1834 if (!max_loop_iterations (loop, &nit_max))
1835 return false;
1836
1837 /* Check if nit + 1 overflows. */
1838 widest_int type_max = wi::to_widest (TYPE_MAXVAL (nit_type));
1839 if (!wi::lts_p (nit_max, type_max))
1840 return false;
1841
355fe088 1842 gimple *def = SSA_NAME_DEF_STMT (nit);
7c82d827 1843
4f75d608 1844 /* Try to find nit + 1, in the form of n in an assignment nit = n - 1. */
fd7b3ef5
TV
1845 if (def
1846 && is_gimple_assign (def)
1847 && gimple_assign_rhs_code (def) == PLUS_EXPR)
1848 {
1849 tree op1 = gimple_assign_rhs1 (def);
1850 tree op2 = gimple_assign_rhs2 (def);
1851 if (integer_minus_onep (op1))
1852 alt_bound = op2;
1853 else if (integer_minus_onep (op2))
1854 alt_bound = op1;
7c82d827
TV
1855 }
1856
9f620bf1 1857 /* If not found, insert nit + 1. */
7c82d827 1858 if (alt_bound == NULL_TREE)
9f620bf1
TV
1859 {
1860 alt_bound = fold_build2 (PLUS_EXPR, nit_type, nit,
1861 build_int_cst_type (nit_type, 1));
1862
1863 gimple_stmt_iterator gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
1864
1865 alt_bound
1866 = force_gimple_operand_gsi (&gsi, alt_bound, true, NULL_TREE, false,
1867 GSI_CONTINUE_LINKING);
1868 }
7c82d827
TV
1869
1870 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1871 return true;
1872}
1873
1874/* Moves the exit condition of LOOP to the beginning of its header. NIT is the
1875 number of iterations of the loop. REDUCTION_LIST describes the reductions in
1876 LOOP. */
5f40b3cb
ZD
1877
1878static void
4a8fb1a1 1879transform_to_exit_first_loop (struct loop *loop,
c203e8a7 1880 reduction_info_table_type *reduction_list,
4a8fb1a1 1881 tree nit)
5f40b3cb
ZD
1882{
1883 basic_block *bbs, *nbbs, ex_bb, orig_header;
1884 unsigned n;
1885 bool ok;
1886 edge exit = single_dom_exit (loop), hpred;
726a989a 1887 tree control, control_name, res, t;
538dd0b7
DM
1888 gphi *phi, *nphi;
1889 gassign *stmt;
1890 gcond *cond_stmt, *cond_nit;
48710229 1891 tree nit_1;
5f40b3cb
ZD
1892
1893 split_block_after_labels (loop->header);
1894 orig_header = single_succ (loop->header);
1895 hpred = single_succ_edge (loop->header);
1896
538dd0b7 1897 cond_stmt = as_a <gcond *> (last_stmt (exit->src));
726a989a
RB
1898 control = gimple_cond_lhs (cond_stmt);
1899 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
5f40b3cb
ZD
1900
1901 /* Make sure that we have phi nodes on exit for all loop header phis
1902 (create_parallel_loop requires that). */
538dd0b7
DM
1903 for (gphi_iterator gsi = gsi_start_phis (loop->header);
1904 !gsi_end_p (gsi);
1905 gsi_next (&gsi))
5f40b3cb 1906 {
538dd0b7 1907 phi = gsi.phi ();
5f40b3cb 1908 res = PHI_RESULT (phi);
070ecdfd 1909 t = copy_ssa_name (res, phi);
5f40b3cb 1910 SET_PHI_RESULT (phi, t);
5f40b3cb 1911 nphi = create_phi_node (res, orig_header);
9e227d60 1912 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
5f40b3cb
ZD
1913
1914 if (res == control)
1915 {
726a989a 1916 gimple_cond_set_lhs (cond_stmt, t);
5f40b3cb
ZD
1917 update_stmt (cond_stmt);
1918 control = t;
1919 }
1920 }
12037899 1921
5f40b3cb 1922 bbs = get_loop_body_in_dom_order (loop);
48710229 1923
69958396
RL
1924 for (n = 0; bbs[n] != exit->src; n++)
1925 continue;
5f40b3cb 1926 nbbs = XNEWVEC (basic_block, n);
726a989a
RB
1927 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1928 bbs + 1, n, nbbs);
5f40b3cb
ZD
1929 gcc_assert (ok);
1930 free (bbs);
1931 ex_bb = nbbs[0];
1932 free (nbbs);
1933
b8698a0f 1934 /* Other than reductions, the only gimple reg that should be copied
726a989a 1935 out of the loop is the control variable. */
69958396 1936 exit = single_dom_exit (loop);
5f40b3cb 1937 control_name = NULL_TREE;
538dd0b7
DM
1938 for (gphi_iterator gsi = gsi_start_phis (ex_bb);
1939 !gsi_end_p (gsi); )
5f40b3cb 1940 {
538dd0b7 1941 phi = gsi.phi ();
5f40b3cb 1942 res = PHI_RESULT (phi);
ea057359 1943 if (virtual_operand_p (res))
726a989a
RB
1944 {
1945 gsi_next (&gsi);
1946 continue;
1947 }
5f40b3cb 1948
a509ebb5 1949 /* Check if it is a part of reduction. If it is,
b8698a0f
L
1950 keep the phi at the reduction's keep_res field. The
1951 PHI_RESULT of this phi is the resulting value of the reduction
a509ebb5
RL
1952 variable when exiting the loop. */
1953
c203e8a7 1954 if (reduction_list->elements () > 0)
a509ebb5
RL
1955 {
1956 struct reduction_info *red;
1957
1958 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
a509ebb5
RL
1959 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1960 if (red)
726a989a
RB
1961 {
1962 red->keep_res = phi;
1963 gsi_next (&gsi);
1964 continue;
1965 }
a509ebb5 1966 }
726a989a
RB
1967 gcc_assert (control_name == NULL_TREE
1968 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
5f40b3cb 1969 control_name = res;
726a989a 1970 remove_phi_node (&gsi, false);
5f40b3cb
ZD
1971 }
1972 gcc_assert (control_name != NULL_TREE);
5f40b3cb 1973
b8698a0f 1974 /* Initialize the control variable to number of iterations
48710229 1975 according to the rhs of the exit condition. */
538dd0b7
DM
1976 gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
1977 cond_nit = as_a <gcond *> (last_stmt (exit->src));
48710229
RL
1978 nit_1 = gimple_cond_rhs (cond_nit);
1979 nit_1 = force_gimple_operand_gsi (&gsi,
1980 fold_convert (TREE_TYPE (control_name), nit_1),
726a989a 1981 false, NULL_TREE, false, GSI_SAME_STMT);
48710229 1982 stmt = gimple_build_assign (control_name, nit_1);
726a989a 1983 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1984}
1985
1986/* Create the parallel constructs for LOOP as described in gen_parallel_loop.
726a989a 1987 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
5f40b3cb 1988 NEW_DATA is the variable that should be initialized from the argument
a6f4d493 1989 of LOOP_FN. N_THREADS is the requested number of threads. */
5f40b3cb 1990
a6f4d493 1991static void
5f40b3cb 1992create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
9ff70652 1993 tree new_data, unsigned n_threads, location_t loc)
5f40b3cb 1994{
726a989a 1995 gimple_stmt_iterator gsi;
e67d7a1e 1996 basic_block bb, paral_bb, for_bb, ex_bb, continue_bb;
0f900dfa 1997 tree t, param;
538dd0b7 1998 gomp_parallel *omp_par_stmt;
355fe088
TS
1999 gimple *omp_return_stmt1, *omp_return_stmt2;
2000 gimple *phi;
538dd0b7
DM
2001 gcond *cond_stmt;
2002 gomp_for *for_stmt;
2003 gomp_continue *omp_cont_stmt;
726a989a 2004 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
5f40b3cb
ZD
2005 edge exit, nexit, guard, end, e;
2006
726a989a 2007 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
5f40b3cb
ZD
2008 bb = loop_preheader_edge (loop)->src;
2009 paral_bb = single_pred (bb);
726a989a 2010 gsi = gsi_last_bb (paral_bb);
5f40b3cb 2011
9ff70652 2012 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
5f40b3cb 2013 OMP_CLAUSE_NUM_THREADS_EXPR (t)
a509ebb5 2014 = build_int_cst (integer_type_node, n_threads);
538dd0b7
DM
2015 omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
2016 gimple_set_location (omp_par_stmt, loc);
5f40b3cb 2017
538dd0b7 2018 gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
5f40b3cb
ZD
2019
2020 /* Initialize NEW_DATA. */
2021 if (data)
2022 {
538dd0b7
DM
2023 gassign *assign_stmt;
2024
726a989a
RB
2025 gsi = gsi_after_labels (bb);
2026
b731b390 2027 param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
538dd0b7
DM
2028 assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
2029 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
726a989a 2030
538dd0b7 2031 assign_stmt = gimple_build_assign (new_data,
726a989a 2032 fold_convert (TREE_TYPE (new_data), param));
538dd0b7 2033 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5f40b3cb
ZD
2034 }
2035
726a989a 2036 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
5f40b3cb 2037 bb = split_loop_exit_edge (single_dom_exit (loop));
726a989a 2038 gsi = gsi_last_bb (bb);
538dd0b7
DM
2039 omp_return_stmt1 = gimple_build_omp_return (false);
2040 gimple_set_location (omp_return_stmt1, loc);
2041 gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
5f40b3cb 2042
726a989a 2043 /* Extract data for GIMPLE_OMP_FOR. */
5f40b3cb 2044 gcc_assert (loop->header == single_dom_exit (loop)->src);
538dd0b7 2045 cond_stmt = as_a <gcond *> (last_stmt (loop->header));
5f40b3cb 2046
726a989a 2047 cvar = gimple_cond_lhs (cond_stmt);
5f40b3cb
ZD
2048 cvar_base = SSA_NAME_VAR (cvar);
2049 phi = SSA_NAME_DEF_STMT (cvar);
2050 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
b731b390 2051 initvar = copy_ssa_name (cvar);
5f40b3cb
ZD
2052 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2053 initvar);
2054 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2055
1dff453d 2056 gsi = gsi_last_nondebug_bb (loop->latch);
726a989a
RB
2057 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2058 gsi_remove (&gsi, true);
5f40b3cb
ZD
2059
2060 /* Prepare cfg. */
2061 for_bb = split_edge (loop_preheader_edge (loop));
2062 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2063 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2064 gcc_assert (exit == single_dom_exit (loop));
2065
2066 guard = make_edge (for_bb, ex_bb, 0);
e67d7a1e
TV
2067 /* Split the latch edge, so LOOPS_HAVE_SIMPLE_LATCHES is still valid. */
2068 loop->latch = split_edge (single_succ_edge (loop->latch));
2069 single_pred_edge (loop->latch)->flags = 0;
2070 end = make_edge (single_pred (loop->latch), ex_bb, EDGE_FALLTHRU);
2071 rescan_loop_exit (end, true, false);
2072
538dd0b7
DM
2073 for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2074 !gsi_end_p (gpi); gsi_next (&gpi))
5f40b3cb 2075 {
f5045c96 2076 source_location locus;
538dd0b7 2077 gphi *phi = gpi.phi ();
7781d262 2078 tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
355fe088 2079 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
538dd0b7 2080
7781d262
TV
2081 /* If the exit phi is not connected to a header phi in the same loop, this
2082 value is not modified in the loop, and we're done with this phi. */
2083 if (!(gimple_code (def_stmt) == GIMPLE_PHI
2084 && gimple_bb (def_stmt) == loop->header))
2085 continue;
f5045c96 2086
7781d262 2087 gphi *stmt = as_a <gphi *> (def_stmt);
f5045c96 2088 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
b8698a0f 2089 locus = gimple_phi_arg_location_from_edge (stmt,
f5045c96 2090 loop_preheader_edge (loop));
9e227d60 2091 add_phi_arg (phi, def, guard, locus);
f5045c96
AM
2092
2093 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2094 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
9e227d60 2095 add_phi_arg (phi, def, end, locus);
5f40b3cb
ZD
2096 }
2097 e = redirect_edge_and_branch (exit, nexit->dest);
2098 PENDING_STMT (e) = NULL;
2099
726a989a
RB
2100 /* Emit GIMPLE_OMP_FOR. */
2101 gimple_cond_set_lhs (cond_stmt, cvar_base);
5f40b3cb 2102 type = TREE_TYPE (cvar);
9ff70652 2103 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
f7f18684 2104 int chunk_size = PARAM_VALUE (PARAM_PARLOOPS_CHUNK_SIZE);
1f600fea
TV
2105 enum PARAM_PARLOOPS_SCHEDULE_KIND schedule_type \
2106 = (enum PARAM_PARLOOPS_SCHEDULE_KIND) PARAM_VALUE (PARAM_PARLOOPS_SCHEDULE);
2107 switch (schedule_type)
2108 {
2109 case PARAM_PARLOOPS_SCHEDULE_KIND_static:
2110 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2111 break;
2112 case PARAM_PARLOOPS_SCHEDULE_KIND_dynamic:
2113 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_DYNAMIC;
2114 break;
2115 case PARAM_PARLOOPS_SCHEDULE_KIND_guided:
2116 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_GUIDED;
2117 break;
2118 case PARAM_PARLOOPS_SCHEDULE_KIND_auto:
2119 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_AUTO;
2120 chunk_size = 0;
2121 break;
2122 case PARAM_PARLOOPS_SCHEDULE_KIND_runtime:
2123 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_RUNTIME;
2124 chunk_size = 0;
2125 break;
2126 default:
2127 gcc_unreachable ();
2128 }
f7f18684
TV
2129 if (chunk_size != 0)
2130 OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t)
2131 = build_int_cst (integer_type_node, chunk_size);
5f40b3cb 2132
74bf76ed 2133 for_stmt = gimple_build_omp_for (NULL, GF_OMP_FOR_KIND_FOR, t, 1, NULL);
9ff70652 2134 gimple_set_location (for_stmt, loc);
726a989a
RB
2135 gimple_omp_for_set_index (for_stmt, 0, initvar);
2136 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2137 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2138 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2139 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2140 cvar_base,
2141 build_int_cst (type, 1)));
2142
2143 gsi = gsi_last_bb (for_bb);
2144 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
5f40b3cb
ZD
2145 SSA_NAME_DEF_STMT (initvar) = for_stmt;
2146
726a989a 2147 /* Emit GIMPLE_OMP_CONTINUE. */
e67d7a1e
TV
2148 continue_bb = single_pred (loop->latch);
2149 gsi = gsi_last_bb (continue_bb);
538dd0b7
DM
2150 omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2151 gimple_set_location (omp_cont_stmt, loc);
2152 gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2153 SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
5f40b3cb 2154
726a989a
RB
2155 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2156 gsi = gsi_last_bb (ex_bb);
538dd0b7
DM
2157 omp_return_stmt2 = gimple_build_omp_return (true);
2158 gimple_set_location (omp_return_stmt2, loc);
2159 gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
5f40b3cb 2160
cd7d9fd7
RG
2161 /* After the above dom info is hosed. Re-compute it. */
2162 free_dominance_info (CDI_DOMINATORS);
2163 calculate_dominance_info (CDI_DOMINATORS);
5f40b3cb
ZD
2164}
2165
08dab97a
RL
2166/* Generates code to execute the iterations of LOOP in N_THREADS
2167 threads in parallel.
2168
2169 NITER describes number of iterations of LOOP.
fa10beec 2170 REDUCTION_LIST describes the reductions existent in the LOOP. */
5f40b3cb
ZD
2171
2172static void
c203e8a7
TS
2173gen_parallel_loop (struct loop *loop,
2174 reduction_info_table_type *reduction_list,
a509ebb5 2175 unsigned n_threads, struct tree_niter_desc *niter)
5f40b3cb 2176{
5f40b3cb 2177 tree many_iterations_cond, type, nit;
726a989a
RB
2178 tree arg_struct, new_arg_struct;
2179 gimple_seq stmts;
9f9f72aa 2180 edge entry, exit;
a509ebb5 2181 struct clsn_data clsn_data;
5f40b3cb 2182 unsigned prob;
9ff70652 2183 location_t loc;
355fe088 2184 gimple *cond_stmt;
768da0da 2185 unsigned int m_p_thread=2;
5f40b3cb
ZD
2186
2187 /* From
2188
2189 ---------------------------------------------------------------------
2190 loop
2191 {
2192 IV = phi (INIT, IV + STEP)
2193 BODY1;
2194 if (COND)
2195 break;
2196 BODY2;
2197 }
2198 ---------------------------------------------------------------------
2199
2200 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2201 we generate the following code:
2202
2203 ---------------------------------------------------------------------
2204
2205 if (MAY_BE_ZERO
a509ebb5
RL
2206 || NITER < MIN_PER_THREAD * N_THREADS)
2207 goto original;
5f40b3cb
ZD
2208
2209 BODY1;
2210 store all local loop-invariant variables used in body of the loop to DATA.
726a989a 2211 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
5f40b3cb 2212 load the variables from DATA.
726a989a 2213 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
5f40b3cb
ZD
2214 BODY2;
2215 BODY1;
726a989a
RB
2216 GIMPLE_OMP_CONTINUE;
2217 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
2218 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
5f40b3cb
ZD
2219 goto end;
2220
2221 original:
2222 loop
2223 {
2224 IV = phi (INIT, IV + STEP)
2225 BODY1;
2226 if (COND)
2227 break;
2228 BODY2;
2229 }
2230
2231 end:
2232
2233 */
2234
2235 /* Create two versions of the loop -- in the old one, we know that the
2236 number of iterations is large enough, and we will transform it into the
2237 loop that will be split to loop_fn, the new one will be used for the
2238 remaining iterations. */
a509ebb5 2239
768da0da
RL
2240 /* We should compute a better number-of-iterations value for outer loops.
2241 That is, if we have
2242
2243 for (i = 0; i < n; ++i)
2244 for (j = 0; j < m; ++j)
2245 ...
2246
2247 we should compute nit = n * m, not nit = n.
2248 Also may_be_zero handling would need to be adjusted. */
2249
5f40b3cb
ZD
2250 type = TREE_TYPE (niter->niter);
2251 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
2252 NULL_TREE);
2253 if (stmts)
726a989a 2254 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb 2255
768da0da
RL
2256 if (loop->inner)
2257 m_p_thread=2;
2258 else
2259 m_p_thread=MIN_PER_THREAD;
2260
2261 many_iterations_cond =
2262 fold_build2 (GE_EXPR, boolean_type_node,
2263 nit, build_int_cst (type, m_p_thread * n_threads));
2264
5f40b3cb 2265 many_iterations_cond
a509ebb5
RL
2266 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2267 invert_truthvalue (unshare_expr (niter->may_be_zero)),
2268 many_iterations_cond);
5f40b3cb 2269 many_iterations_cond
a509ebb5 2270 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
5f40b3cb 2271 if (stmts)
726a989a 2272 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb
ZD
2273 if (!is_gimple_condexpr (many_iterations_cond))
2274 {
2275 many_iterations_cond
a509ebb5
RL
2276 = force_gimple_operand (many_iterations_cond, &stmts,
2277 true, NULL_TREE);
5f40b3cb 2278 if (stmts)
726a989a 2279 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb
ZD
2280 }
2281
2282 initialize_original_copy_tables ();
2283
2284 /* We assume that the loop usually iterates a lot. */
2285 prob = 4 * REG_BR_PROB_BASE / 5;
0f900dfa
JJ
2286 loop_version (loop, many_iterations_cond, NULL,
2287 prob, prob, REG_BR_PROB_BASE - prob, true);
5f40b3cb
ZD
2288 update_ssa (TODO_update_ssa);
2289 free_original_copy_tables ();
2290
2291 /* Base all the induction variables in LOOP on a single control one. */
c80a5403 2292 canonicalize_loop_ivs (loop, &nit, true);
5f40b3cb 2293
7c82d827
TV
2294 /* Ensure that the exit condition is the first statement in the loop.
2295 The common case is that latch of the loop is empty (apart from the
2296 increment) and immediately follows the loop exit test. Attempt to move the
2297 entry of the loop directly before the exit check and increase the number of
2298 iterations of the loop by one. */
a5a57bf3
TV
2299 if (try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
2300 {
2301 if (dump_file
2302 && (dump_flags & TDF_DETAILS))
2303 fprintf (dump_file,
2304 "alternative exit-first loop transform succeeded"
2305 " for loop %d\n", loop->num);
2306 }
2307 else
7c82d827
TV
2308 {
2309 /* Fall back on the method that handles more cases, but duplicates the
2310 loop body: move the exit condition of LOOP to the beginning of its
2311 header, and duplicate the part of the last iteration that gets disabled
2312 to the exit of the loop. */
2313 transform_to_exit_first_loop (loop, reduction_list, nit);
2314 }
a509ebb5 2315
fa10beec 2316 /* Generate initializations for reductions. */
c203e8a7
TS
2317 if (reduction_list->elements () > 0)
2318 reduction_list->traverse <struct loop *, initialize_reductions> (loop);
5f40b3cb
ZD
2319
2320 /* Eliminate the references to local variables from the loop. */
9f9f72aa
AP
2321 gcc_assert (single_exit (loop));
2322 entry = loop_preheader_edge (loop);
2323 exit = single_dom_exit (loop);
5f40b3cb 2324
9f9f72aa 2325 eliminate_local_variables (entry, exit);
5f40b3cb
ZD
2326 /* In the old loop, move all variables non-local to the loop to a structure
2327 and back, and create separate decls for the variables used in loop. */
b8698a0f 2328 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
9f9f72aa 2329 &new_arg_struct, &clsn_data);
5f40b3cb
ZD
2330
2331 /* Create the parallel constructs. */
9ff70652
JJ
2332 loc = UNKNOWN_LOCATION;
2333 cond_stmt = last_stmt (loop->header);
2334 if (cond_stmt)
2335 loc = gimple_location (cond_stmt);
18751894
TV
2336 create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
2337 new_arg_struct, n_threads, loc);
c203e8a7 2338 if (reduction_list->elements () > 0)
a509ebb5 2339 create_call_for_reduction (loop, reduction_list, &clsn_data);
5f40b3cb
ZD
2340
2341 scev_reset ();
2342
92a6bdbd
SP
2343 /* Free loop bound estimations that could contain references to
2344 removed statements. */
f0bd40b1 2345 FOR_EACH_LOOP (loop, 0)
92a6bdbd 2346 free_numbers_of_iterations_estimates_loop (loop);
5f40b3cb
ZD
2347}
2348
9857228c
SP
2349/* Returns true when LOOP contains vector phi nodes. */
2350
2351static bool
726a989a 2352loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
9857228c
SP
2353{
2354 unsigned i;
2355 basic_block *bbs = get_loop_body_in_dom_order (loop);
538dd0b7 2356 gphi_iterator gsi;
9857228c 2357 bool res = true;
9857228c
SP
2358
2359 for (i = 0; i < loop->num_nodes; i++)
726a989a 2360 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
538dd0b7 2361 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
9857228c
SP
2362 goto end;
2363
2364 res = false;
2365 end:
2366 free (bbs);
2367 return res;
2368}
2369
08dab97a
RL
2370/* Create a reduction_info struct, initialize it with REDUC_STMT
2371 and PHI, insert it to the REDUCTION_LIST. */
2372
2373static void
c203e8a7 2374build_new_reduction (reduction_info_table_type *reduction_list,
355fe088 2375 gimple *reduc_stmt, gphi *phi)
08dab97a 2376{
4a8fb1a1 2377 reduction_info **slot;
08dab97a 2378 struct reduction_info *new_reduction;
12efb1d7 2379 enum tree_code reduction_code;
08dab97a
RL
2380
2381 gcc_assert (reduc_stmt);
b8698a0f 2382
08dab97a
RL
2383 if (dump_file && (dump_flags & TDF_DETAILS))
2384 {
2385 fprintf (dump_file,
2386 "Detected reduction. reduction stmt is: \n");
2387 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
2388 fprintf (dump_file, "\n");
2389 }
b8698a0f 2390
12efb1d7
TV
2391 if (gimple_code (reduc_stmt) == GIMPLE_PHI)
2392 {
2393 tree op1 = PHI_ARG_DEF (reduc_stmt, 0);
355fe088 2394 gimple *def1 = SSA_NAME_DEF_STMT (op1);
12efb1d7
TV
2395 reduction_code = gimple_assign_rhs_code (def1);
2396 }
2397
2398 else
2399 reduction_code = gimple_assign_rhs_code (reduc_stmt);
2400
08dab97a 2401 new_reduction = XCNEW (struct reduction_info);
b8698a0f 2402
08dab97a
RL
2403 new_reduction->reduc_stmt = reduc_stmt;
2404 new_reduction->reduc_phi = phi;
5d1fd1de 2405 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
12efb1d7 2406 new_reduction->reduction_code = reduction_code;
c203e8a7 2407 slot = reduction_list->find_slot (new_reduction, INSERT);
08dab97a
RL
2408 *slot = new_reduction;
2409}
2410
5d1fd1de
JJ
2411/* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
2412
4a8fb1a1
LC
2413int
2414set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
5d1fd1de 2415{
4a8fb1a1 2416 struct reduction_info *const red = *slot;
5d1fd1de
JJ
2417 gimple_set_uid (red->reduc_phi, red->reduc_version);
2418 return 1;
2419}
2420
08dab97a
RL
2421/* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
2422
2423static void
c203e8a7 2424gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
08dab97a 2425{
538dd0b7 2426 gphi_iterator gsi;
08dab97a 2427 loop_vec_info simple_loop_info;
12efb1d7
TV
2428 loop_vec_info simple_inner_loop_info = NULL;
2429 bool allow_double_reduc = true;
08dab97a 2430
fdce493d
TV
2431 if (!stmt_vec_info_vec.exists ())
2432 init_stmt_vec_info_vec ();
2433
08dab97a 2434 simple_loop_info = vect_analyze_loop_form (loop);
1e6a7b01
TV
2435 if (simple_loop_info == NULL)
2436 return;
08dab97a
RL
2437
2438 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2439 {
538dd0b7 2440 gphi *phi = gsi.phi ();
08dab97a
RL
2441 affine_iv iv;
2442 tree res = PHI_RESULT (phi);
2443 bool double_reduc;
2444
ea057359 2445 if (virtual_operand_p (res))
08dab97a
RL
2446 continue;
2447
1e6a7b01
TV
2448 if (simple_iv (loop, loop, res, &iv, true))
2449 continue;
2450
355fe088 2451 gimple *reduc_stmt
1e6a7b01
TV
2452 = vect_force_simple_reduction (simple_loop_info, phi, true,
2453 &double_reduc, true);
12efb1d7 2454 if (!reduc_stmt)
1e6a7b01
TV
2455 continue;
2456
12efb1d7
TV
2457 if (double_reduc)
2458 {
2459 if (!allow_double_reduc
2460 || loop->inner->inner != NULL)
2461 continue;
2462
2463 if (!simple_inner_loop_info)
2464 {
2465 simple_inner_loop_info = vect_analyze_loop_form (loop->inner);
2466 if (!simple_inner_loop_info)
2467 {
2468 allow_double_reduc = false;
2469 continue;
2470 }
2471 }
2472
2473 use_operand_p use_p;
355fe088 2474 gimple *inner_stmt;
12efb1d7
TV
2475 bool single_use_p = single_imm_use (res, &use_p, &inner_stmt);
2476 gcc_assert (single_use_p);
2477 gphi *inner_phi = as_a <gphi *> (inner_stmt);
2478 if (simple_iv (loop->inner, loop->inner, PHI_RESULT (inner_phi),
2479 &iv, true))
2480 continue;
2481
355fe088 2482 gimple *inner_reduc_stmt
12efb1d7
TV
2483 = vect_force_simple_reduction (simple_inner_loop_info, inner_phi,
2484 true, &double_reduc, true);
2485 gcc_assert (!double_reduc);
2486 if (inner_reduc_stmt == NULL)
2487 continue;
2488 }
2489
1e6a7b01 2490 build_new_reduction (reduction_list, reduc_stmt, phi);
08dab97a 2491 }
5d1fd1de 2492 destroy_loop_vec_info (simple_loop_info, true);
12efb1d7 2493 destroy_loop_vec_info (simple_inner_loop_info, true);
5d1fd1de 2494
fdce493d
TV
2495 /* Release the claim on gimple_uid. */
2496 free_stmt_vec_info_vec ();
2497
5d1fd1de 2498 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
fdce493d
TV
2499 and free_stmt_vec_info_vec, we can set gimple_uid of reduc_phi stmts only
2500 now. */
2501 basic_block bb;
2502 FOR_EACH_BB_FN (bb, cfun)
2503 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2504 gimple_set_uid (gsi_stmt (gsi), (unsigned int)-1);
c203e8a7 2505 reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
08dab97a
RL
2506}
2507
2508/* Try to initialize NITER for code generation part. */
2509
2510static bool
2511try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2512{
2513 edge exit = single_dom_exit (loop);
2514
2515 gcc_assert (exit);
2516
2517 /* We need to know # of iterations, and there should be no uses of values
2518 defined inside loop outside of it, unless the values are invariants of
2519 the loop. */
2520 if (!number_of_iterations_exit (loop, exit, niter, false))
2521 {
2522 if (dump_file && (dump_flags & TDF_DETAILS))
2523 fprintf (dump_file, " FAILED: number of iterations not known\n");
2524 return false;
2525 }
2526
2527 return true;
2528}
2529
2530/* Try to initialize REDUCTION_LIST for code generation part.
2531 REDUCTION_LIST describes the reductions. */
2532
2533static bool
4a8fb1a1 2534try_create_reduction_list (loop_p loop,
c203e8a7 2535 reduction_info_table_type *reduction_list)
08dab97a
RL
2536{
2537 edge exit = single_dom_exit (loop);
538dd0b7 2538 gphi_iterator gsi;
08dab97a
RL
2539
2540 gcc_assert (exit);
2541
f993a853
TV
2542 /* Try to get rid of exit phis. */
2543 final_value_replacement_loop (loop);
2544
08dab97a
RL
2545 gather_scalar_reductions (loop, reduction_list);
2546
b8698a0f 2547
08dab97a
RL
2548 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2549 {
538dd0b7 2550 gphi *phi = gsi.phi ();
08dab97a
RL
2551 struct reduction_info *red;
2552 imm_use_iterator imm_iter;
2553 use_operand_p use_p;
355fe088 2554 gimple *reduc_phi;
08dab97a
RL
2555 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2556
ea057359 2557 if (!virtual_operand_p (val))
08dab97a
RL
2558 {
2559 if (dump_file && (dump_flags & TDF_DETAILS))
2560 {
2561 fprintf (dump_file, "phi is ");
2562 print_gimple_stmt (dump_file, phi, 0, 0);
2563 fprintf (dump_file, "arg of phi to exit: value ");
2564 print_generic_expr (dump_file, val, 0);
2565 fprintf (dump_file, " used outside loop\n");
2566 fprintf (dump_file,
2567 " checking if it a part of reduction pattern: \n");
2568 }
c203e8a7 2569 if (reduction_list->elements () == 0)
08dab97a
RL
2570 {
2571 if (dump_file && (dump_flags & TDF_DETAILS))
2572 fprintf (dump_file,
2573 " FAILED: it is not a part of reduction.\n");
2574 return false;
2575 }
2576 reduc_phi = NULL;
2577 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2578 {
4942af9b
JJ
2579 if (!gimple_debug_bind_p (USE_STMT (use_p))
2580 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
08dab97a
RL
2581 {
2582 reduc_phi = USE_STMT (use_p);
2583 break;
2584 }
2585 }
2586 red = reduction_phi (reduction_list, reduc_phi);
2587 if (red == NULL)
2588 {
2589 if (dump_file && (dump_flags & TDF_DETAILS))
2590 fprintf (dump_file,
2591 " FAILED: it is not a part of reduction.\n");
2592 return false;
2593 }
2594 if (dump_file && (dump_flags & TDF_DETAILS))
2595 {
2596 fprintf (dump_file, "reduction phi is ");
2597 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2598 fprintf (dump_file, "reduction stmt is ");
2599 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2600 }
2601 }
2602 }
2603
2604 /* The iterations of the loop may communicate only through bivs whose
2605 iteration space can be distributed efficiently. */
2606 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2607 {
538dd0b7 2608 gphi *phi = gsi.phi ();
08dab97a
RL
2609 tree def = PHI_RESULT (phi);
2610 affine_iv iv;
2611
ea057359 2612 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
08dab97a
RL
2613 {
2614 struct reduction_info *red;
2615
2616 red = reduction_phi (reduction_list, phi);
2617 if (red == NULL)
2618 {
2619 if (dump_file && (dump_flags & TDF_DETAILS))
2620 fprintf (dump_file,
2621 " FAILED: scalar dependency between iterations\n");
2622 return false;
2623 }
2624 }
2625 }
2626
2627
2628 return true;
2629}
2630
5f40b3cb
ZD
2631/* Detect parallel loops and generate parallel code using libgomp
2632 primitives. Returns true if some loop was parallelized, false
2633 otherwise. */
2634
09489eb8 2635static bool
5f40b3cb
ZD
2636parallelize_loops (void)
2637{
2638 unsigned n_threads = flag_tree_parallelize_loops;
2639 bool changed = false;
2640 struct loop *loop;
e67d7a1e 2641 struct loop *skip_loop = NULL;
5f40b3cb 2642 struct tree_niter_desc niter_desc;
f873b205 2643 struct obstack parloop_obstack;
8adfe01d 2644 HOST_WIDE_INT estimated;
b05e0233 2645 source_location loop_loc;
f873b205 2646
5f40b3cb
ZD
2647 /* Do not parallelize loops in the functions created by parallelization. */
2648 if (parallelized_function_p (cfun->decl))
2649 return false;
8adfe01d
RL
2650 if (cfun->has_nonlocal_label)
2651 return false;
5f40b3cb 2652
f873b205 2653 gcc_obstack_init (&parloop_obstack);
c203e8a7 2654 reduction_info_table_type reduction_list (10);
a509ebb5 2655
f0bd40b1 2656 FOR_EACH_LOOP (loop, 0)
5f40b3cb 2657 {
e67d7a1e
TV
2658 if (loop == skip_loop)
2659 {
2660 if (dump_file && (dump_flags & TDF_DETAILS))
2661 fprintf (dump_file,
2662 "Skipping loop %d as inner loop of parallelized loop\n",
2663 loop->num);
2664
2665 skip_loop = loop->inner;
2666 continue;
2667 }
2668 else
2669 skip_loop = NULL;
2670
4a8fb1a1 2671 reduction_list.empty ();
48710229
RL
2672 if (dump_file && (dump_flags & TDF_DETAILS))
2673 {
2674 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2675 if (loop->inner)
2676 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2677 else
2678 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2679 }
b8698a0f 2680
48710229 2681 /* If we use autopar in graphite pass, we use its marked dependency
87d4d0ee
SP
2682 checking results. */
2683 if (flag_loop_parallelize_all && !loop->can_be_parallel)
48710229
RL
2684 {
2685 if (dump_file && (dump_flags & TDF_DETAILS))
2686 fprintf (dump_file, "loop is not parallel according to graphite\n");
87d4d0ee 2687 continue;
48710229 2688 }
87d4d0ee 2689
48710229
RL
2690 if (!single_dom_exit (loop))
2691 {
b8698a0f 2692
48710229
RL
2693 if (dump_file && (dump_flags & TDF_DETAILS))
2694 fprintf (dump_file, "loop is !single_dom_exit\n");
b8698a0f 2695
08dab97a 2696 continue;
48710229 2697 }
08dab97a
RL
2698
2699 if (/* And of course, the loop must be parallelizable. */
2700 !can_duplicate_loop_p (loop)
1d4af1e8 2701 || loop_has_blocks_with_irreducible_flag (loop)
8adfe01d 2702 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
9857228c 2703 /* FIXME: the check for vector phi nodes could be removed. */
69958396 2704 || loop_has_vector_phi_nodes (loop))
08dab97a 2705 continue;
e5b332cd 2706
652c4c71 2707 estimated = estimated_stmt_executions_int (loop);
e5b332cd
RG
2708 if (estimated == -1)
2709 estimated = max_stmt_executions_int (loop);
87d4d0ee 2710 /* FIXME: Bypass this check as graphite doesn't update the
e5b332cd 2711 count and frequency correctly now. */
87d4d0ee 2712 if (!flag_loop_parallelize_all
e5b332cd
RG
2713 && ((estimated != -1
2714 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
87d4d0ee
SP
2715 /* Do not bother with loops in cold areas. */
2716 || optimize_loop_nest_for_size_p (loop)))
08dab97a 2717 continue;
b8698a0f 2718
08dab97a
RL
2719 if (!try_get_loop_niter (loop, &niter_desc))
2720 continue;
2721
c203e8a7 2722 if (!try_create_reduction_list (loop, &reduction_list))
08dab97a
RL
2723 continue;
2724
f873b205
LB
2725 if (!flag_loop_parallelize_all
2726 && !loop_parallel_p (loop, &parloop_obstack))
5f40b3cb
ZD
2727 continue;
2728
2729 changed = true;
e67d7a1e 2730 skip_loop = loop->inner;
48710229
RL
2731 if (dump_file && (dump_flags & TDF_DETAILS))
2732 {
48710229 2733 if (loop->inner)
8adfe01d 2734 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
48710229 2735 else
8adfe01d
RL
2736 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2737 loop_loc = find_loop_location (loop);
b05e0233 2738 if (loop_loc != UNKNOWN_LOCATION)
8adfe01d 2739 fprintf (dump_file, "\nloop at %s:%d: ",
b05e0233 2740 LOCATION_FILE (loop_loc), LOCATION_LINE (loop_loc));
b8698a0f 2741 }
c203e8a7 2742 gen_parallel_loop (loop, &reduction_list,
08dab97a 2743 n_threads, &niter_desc);
5f40b3cb
ZD
2744 }
2745
f873b205 2746 obstack_free (&parloop_obstack, NULL);
6b8ed145
RG
2747
2748 /* Parallelization will cause new function calls to be inserted through
d086d311
RG
2749 which local variables will escape. Reset the points-to solution
2750 for ESCAPED. */
6b8ed145 2751 if (changed)
d086d311 2752 pt_solution_reset (&cfun->gimple_df->escaped);
6b8ed145 2753
5f40b3cb
ZD
2754 return changed;
2755}
2756
c1bf2a39
AM
2757/* Parallelization. */
2758
c1bf2a39
AM
2759namespace {
2760
2761const pass_data pass_data_parallelize_loops =
2762{
2763 GIMPLE_PASS, /* type */
2764 "parloops", /* name */
2765 OPTGROUP_LOOP, /* optinfo_flags */
c1bf2a39
AM
2766 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
2767 ( PROP_cfg | PROP_ssa ), /* properties_required */
2768 0, /* properties_provided */
2769 0, /* properties_destroyed */
2770 0, /* todo_flags_start */
3bea341f 2771 0, /* todo_flags_finish */
c1bf2a39
AM
2772};
2773
2774class pass_parallelize_loops : public gimple_opt_pass
2775{
2776public:
2777 pass_parallelize_loops (gcc::context *ctxt)
2778 : gimple_opt_pass (pass_data_parallelize_loops, ctxt)
2779 {}
2780
2781 /* opt_pass methods: */
1a3d085c 2782 virtual bool gate (function *) { return flag_tree_parallelize_loops > 1; }
be55bfe6 2783 virtual unsigned int execute (function *);
c1bf2a39
AM
2784
2785}; // class pass_parallelize_loops
2786
be55bfe6
TS
2787unsigned
2788pass_parallelize_loops::execute (function *fun)
2789{
2790 if (number_of_loops (fun) <= 1)
2791 return 0;
2792
2793 if (parallelize_loops ())
18751894
TV
2794 {
2795 fun->curr_properties &= ~(PROP_gimple_eomp);
e67d7a1e 2796
b2b29377 2797 checking_verify_loop_structure ();
e67d7a1e 2798
18751894
TV
2799 return TODO_update_ssa;
2800 }
2801
be55bfe6
TS
2802 return 0;
2803}
2804
c1bf2a39
AM
2805} // anon namespace
2806
2807gimple_opt_pass *
2808make_pass_parallelize_loops (gcc::context *ctxt)
2809{
2810 return new pass_parallelize_loops (ctxt);
2811}