]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-parloops.c
re PR tree-optimization/81573 (wrong code at -O3 on x86_64-linux-gnu)
[thirdparty/gcc.git] / gcc / tree-parloops.c
CommitLineData
5f40b3cb 1/* Loop autoparallelization.
cbe34bb5 2 Copyright (C) 2006-2017 Free Software Foundation, Inc.
70837b71
RL
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5f40b3cb
ZD
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
6da7fc87 10Software Foundation; either version 3, or (at your option) any later
5f40b3cb
ZD
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
6da7fc87
NC
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
5f40b3cb
ZD
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
c7131fb2 25#include "backend.h"
40e23961 26#include "tree.h"
c7131fb2 27#include "gimple.h"
957060b5
AM
28#include "cfghooks.h"
29#include "tree-pass.h"
c7131fb2 30#include "ssa.h"
957060b5
AM
31#include "cgraph.h"
32#include "gimple-pretty-print.h"
c7131fb2 33#include "fold-const.h"
45b0be94 34#include "gimplify.h"
5be5c238 35#include "gimple-iterator.h"
18f429e2 36#include "gimplify-me.h"
5be5c238 37#include "gimple-walk.h"
d8a2d370
DN
38#include "stor-layout.h"
39#include "tree-nested.h"
442b4905 40#include "tree-cfg.h"
e28030cf
AM
41#include "tree-ssa-loop-ivopts.h"
42#include "tree-ssa-loop-manip.h"
43#include "tree-ssa-loop-niter.h"
442b4905
AM
44#include "tree-ssa-loop.h"
45#include "tree-into-ssa.h"
5f40b3cb 46#include "cfgloop.h"
1bd6497c 47#include "tree-scalar-evolution.h"
5f40b3cb 48#include "langhooks.h"
a509ebb5 49#include "tree-vectorizer.h"
4a8fb1a1 50#include "tree-hasher.h"
c1bf2a39 51#include "tree-parloops.h"
629b3d75 52#include "omp-general.h"
0645c1a2 53#include "omp-low.h"
7c82d827 54#include "tree-ssa.h"
f7f18684 55#include "params.h"
1f600fea 56#include "params-enum.h"
61d9c527
TV
57#include "tree-ssa-alias.h"
58#include "tree-eh.h"
59#include "gomp-constants.h"
60#include "tree-dfa.h"
5f40b3cb
ZD
61
62/* This pass tries to distribute iterations of loops into several threads.
63 The implementation is straightforward -- for each loop we test whether its
64 iterations are independent, and if it is the case (and some additional
65 conditions regarding profitability and correctness are satisfied), we
726a989a
RB
66 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
67 machinery do its job.
b8698a0f 68
5f40b3cb
ZD
69 The most of the complexity is in bringing the code into shape expected
70 by the omp expanders:
726a989a
RB
71 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
72 variable and that the exit test is at the start of the loop body
73 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
5f40b3cb
ZD
74 variables by accesses through pointers, and breaking up ssa chains
75 by storing the values incoming to the parallelized loop to a structure
76 passed to the new function as an argument (something similar is done
77 in omp gimplification, unfortunately only a small part of the code
78 can be shared).
79
80 TODO:
81 -- if there are several parallelizable loops in a function, it may be
82 possible to generate the threads just once (using synchronization to
83 ensure that cross-loop dependences are obeyed).
70837b71
RL
84 -- handling of common reduction patterns for outer loops.
85
86 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
b8698a0f 87/*
a509ebb5 88 Reduction handling:
8a9ecffd 89 currently we use vect_force_simple_reduction() to detect reduction patterns.
a509ebb5 90 The code transformation will be introduced by an example.
b8698a0f
L
91
92
a509ebb5
RL
93parloop
94{
95 int sum=1;
96
0eb7e7aa 97 for (i = 0; i < N; i++)
a509ebb5
RL
98 {
99 x[i] = i + 3;
100 sum+=x[i];
101 }
102}
103
0eb7e7aa 104gimple-like code:
a509ebb5
RL
105header_bb:
106
0eb7e7aa
RL
107 # sum_29 = PHI <sum_11(5), 1(3)>
108 # i_28 = PHI <i_12(5), 0(3)>
109 D.1795_8 = i_28 + 3;
110 x[i_28] = D.1795_8;
111 sum_11 = D.1795_8 + sum_29;
112 i_12 = i_28 + 1;
113 if (N_6(D) > i_12)
114 goto header_bb;
115
a509ebb5
RL
116
117exit_bb:
118
0eb7e7aa
RL
119 # sum_21 = PHI <sum_11(4)>
120 printf (&"%d"[0], sum_21);
a509ebb5
RL
121
122
123after reduction transformation (only relevant parts):
124
125parloop
126{
127
128....
129
0eb7e7aa 130
fa10beec 131 # Storing the initial value given by the user. #
0eb7e7aa 132
ae0bce62 133 .paral_data_store.32.sum.27 = 1;
b8698a0f
L
134
135 #pragma omp parallel num_threads(4)
a509ebb5 136
0eb7e7aa 137 #pragma omp for schedule(static)
ae0bce62
RL
138
139 # The neutral element corresponding to the particular
140 reduction's operation, e.g. 0 for PLUS_EXPR,
141 1 for MULT_EXPR, etc. replaces the user's initial value. #
142
143 # sum.27_29 = PHI <sum.27_11, 0>
144
0eb7e7aa 145 sum.27_11 = D.1827_8 + sum.27_29;
ae0bce62 146
726a989a 147 GIMPLE_OMP_CONTINUE
a509ebb5 148
0eb7e7aa
RL
149 # Adding this reduction phi is done at create_phi_for_local_result() #
150 # sum.27_56 = PHI <sum.27_11, 0>
726a989a 151 GIMPLE_OMP_RETURN
b8698a0f
L
152
153 # Creating the atomic operation is done at
0eb7e7aa 154 create_call_for_reduction_1() #
a509ebb5 155
0eb7e7aa
RL
156 #pragma omp atomic_load
157 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
158 D.1840_60 = sum.27_56 + D.1839_59;
159 #pragma omp atomic_store (D.1840_60);
b8698a0f 160
726a989a 161 GIMPLE_OMP_RETURN
b8698a0f 162
0eb7e7aa
RL
163 # collecting the result after the join of the threads is done at
164 create_loads_for_reductions().
ae0bce62
RL
165 The value computed by the threads is loaded from the
166 shared struct. #
167
b8698a0f 168
0eb7e7aa 169 .paral_data_load.33_52 = &.paral_data_store.32;
ae0bce62 170 sum_37 = .paral_data_load.33_52->sum.27;
0eb7e7aa
RL
171 sum_43 = D.1795_41 + sum_37;
172
173 exit bb:
174 # sum_21 = PHI <sum_43, sum_26>
175 printf (&"%d"[0], sum_21);
176
177...
178
a509ebb5
RL
179}
180
181*/
182
5f40b3cb
ZD
183/* Minimal number of iterations of a loop that should be executed in each
184 thread. */
185#define MIN_PER_THREAD 100
186
b8698a0f 187/* Element of the hashtable, representing a
a509ebb5
RL
188 reduction in the current loop. */
189struct reduction_info
190{
355fe088
TS
191 gimple *reduc_stmt; /* reduction statement. */
192 gimple *reduc_phi; /* The phi node defining the reduction. */
726a989a 193 enum tree_code reduction_code;/* code for the reduction operation. */
5d1fd1de
JJ
194 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
195 result. */
538dd0b7 196 gphi *keep_res; /* The PHI_RESULT of this phi is the resulting value
a509ebb5 197 of the reduction variable when existing the loop. */
ae0bce62 198 tree initial_value; /* The initial value of the reduction var before entering the loop. */
a509ebb5 199 tree field; /* the name of the field in the parloop data structure intended for reduction. */
61d9c527
TV
200 tree reduc_addr; /* The address of the reduction variable for
201 openacc reductions. */
a509ebb5 202 tree init; /* reduction initialization value. */
538dd0b7 203 gphi *new_phi; /* (helper field) Newly created phi node whose result
a509ebb5
RL
204 will be passed to the atomic operation. Represents
205 the local result each thread computed for the reduction
206 operation. */
207};
208
4a8fb1a1 209/* Reduction info hashtable helpers. */
a509ebb5 210
95fbe13e 211struct reduction_hasher : free_ptr_hash <reduction_info>
a509ebb5 212{
67f58944
TS
213 static inline hashval_t hash (const reduction_info *);
214 static inline bool equal (const reduction_info *, const reduction_info *);
4a8fb1a1
LC
215};
216
217/* Equality and hash functions for hashtab code. */
a509ebb5 218
4a8fb1a1 219inline bool
67f58944 220reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
4a8fb1a1 221{
a509ebb5
RL
222 return (a->reduc_phi == b->reduc_phi);
223}
224
4a8fb1a1 225inline hashval_t
67f58944 226reduction_hasher::hash (const reduction_info *a)
a509ebb5 227{
5d1fd1de 228 return a->reduc_version;
a509ebb5
RL
229}
230
c203e8a7 231typedef hash_table<reduction_hasher> reduction_info_table_type;
4a8fb1a1
LC
232
233
a509ebb5 234static struct reduction_info *
355fe088 235reduction_phi (reduction_info_table_type *reduction_list, gimple *phi)
a509ebb5
RL
236{
237 struct reduction_info tmpred, *red;
238
c203e8a7 239 if (reduction_list->elements () == 0 || phi == NULL)
a509ebb5
RL
240 return NULL;
241
fdce493d
TV
242 if (gimple_uid (phi) == (unsigned int)-1
243 || gimple_uid (phi) == 0)
244 return NULL;
245
a509ebb5 246 tmpred.reduc_phi = phi;
5d1fd1de 247 tmpred.reduc_version = gimple_uid (phi);
c203e8a7 248 red = reduction_list->find (&tmpred);
fdce493d 249 gcc_assert (red == NULL || red->reduc_phi == phi);
a509ebb5
RL
250
251 return red;
252}
253
5f40b3cb
ZD
254/* Element of hashtable of names to copy. */
255
256struct name_to_copy_elt
257{
258 unsigned version; /* The version of the name to copy. */
259 tree new_name; /* The new name used in the copy. */
260 tree field; /* The field of the structure used to pass the
261 value. */
262};
263
4a8fb1a1 264/* Name copies hashtable helpers. */
5f40b3cb 265
95fbe13e 266struct name_to_copy_hasher : free_ptr_hash <name_to_copy_elt>
5f40b3cb 267{
67f58944
TS
268 static inline hashval_t hash (const name_to_copy_elt *);
269 static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
4a8fb1a1
LC
270};
271
272/* Equality and hash functions for hashtab code. */
5f40b3cb 273
4a8fb1a1 274inline bool
67f58944 275name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
4a8fb1a1 276{
5f40b3cb
ZD
277 return a->version == b->version;
278}
279
4a8fb1a1 280inline hashval_t
67f58944 281name_to_copy_hasher::hash (const name_to_copy_elt *a)
5f40b3cb 282{
5f40b3cb
ZD
283 return (hashval_t) a->version;
284}
285
c203e8a7 286typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
4a8fb1a1 287
b305e3da
SP
288/* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
289 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
290 represents the denominator for every element in the matrix. */
291typedef struct lambda_trans_matrix_s
292{
293 lambda_matrix matrix;
294 int rowsize;
295 int colsize;
296 int denominator;
297} *lambda_trans_matrix;
298#define LTM_MATRIX(T) ((T)->matrix)
299#define LTM_ROWSIZE(T) ((T)->rowsize)
300#define LTM_COLSIZE(T) ((T)->colsize)
301#define LTM_DENOMINATOR(T) ((T)->denominator)
302
303/* Allocate a new transformation matrix. */
304
305static lambda_trans_matrix
306lambda_trans_matrix_new (int colsize, int rowsize,
307 struct obstack * lambda_obstack)
308{
309 lambda_trans_matrix ret;
310
311 ret = (lambda_trans_matrix)
312 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
313 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
314 LTM_ROWSIZE (ret) = rowsize;
315 LTM_COLSIZE (ret) = colsize;
316 LTM_DENOMINATOR (ret) = 1;
317 return ret;
318}
319
320/* Multiply a vector VEC by a matrix MAT.
321 MAT is an M*N matrix, and VEC is a vector with length N. The result
322 is stored in DEST which must be a vector of length M. */
323
324static void
325lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
326 lambda_vector vec, lambda_vector dest)
327{
328 int i, j;
329
330 lambda_vector_clear (dest, m);
331 for (i = 0; i < m; i++)
332 for (j = 0; j < n; j++)
333 dest[i] += matrix[i][j] * vec[j];
334}
335
336/* Return true if TRANS is a legal transformation matrix that respects
337 the dependence vectors in DISTS and DIRS. The conservative answer
338 is false.
339
340 "Wolfe proves that a unimodular transformation represented by the
341 matrix T is legal when applied to a loop nest with a set of
342 lexicographically non-negative distance vectors RDG if and only if
343 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
344 i.e.: if and only if it transforms the lexicographically positive
345 distance vectors to lexicographically positive vectors. Note that
346 a unimodular matrix must transform the zero vector (and only it) to
347 the zero vector." S.Muchnick. */
348
349static bool
350lambda_transform_legal_p (lambda_trans_matrix trans,
351 int nb_loops,
9771b263 352 vec<ddr_p> dependence_relations)
b305e3da
SP
353{
354 unsigned int i, j;
355 lambda_vector distres;
356 struct data_dependence_relation *ddr;
357
358 gcc_assert (LTM_COLSIZE (trans) == nb_loops
359 && LTM_ROWSIZE (trans) == nb_loops);
360
361 /* When there are no dependences, the transformation is correct. */
9771b263 362 if (dependence_relations.length () == 0)
b305e3da
SP
363 return true;
364
9771b263 365 ddr = dependence_relations[0];
b305e3da
SP
366 if (ddr == NULL)
367 return true;
368
369 /* When there is an unknown relation in the dependence_relations, we
370 know that it is no worth looking at this loop nest: give up. */
371 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
372 return false;
373
374 distres = lambda_vector_new (nb_loops);
375
376 /* For each distance vector in the dependence graph. */
9771b263 377 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
b305e3da
SP
378 {
379 /* Don't care about relations for which we know that there is no
380 dependence, nor about read-read (aka. output-dependences):
381 these data accesses can happen in any order. */
382 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
383 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
384 continue;
385
386 /* Conservatively answer: "this transformation is not valid". */
387 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
388 return false;
389
390 /* If the dependence could not be captured by a distance vector,
391 conservatively answer that the transform is not valid. */
392 if (DDR_NUM_DIST_VECTS (ddr) == 0)
393 return false;
394
395 /* Compute trans.dist_vect */
396 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
397 {
398 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
399 DDR_DIST_VECT (ddr, j), distres);
400
401 if (!lambda_vector_lexico_pos (distres, nb_loops))
402 return false;
403 }
404 }
405 return true;
406}
08dab97a
RL
407
408/* Data dependency analysis. Returns true if the iterations of LOOP
409 are independent on each other (that is, if we can execute them
410 in parallel). */
5f40b3cb
ZD
411
412static bool
f873b205 413loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
5f40b3cb 414{
9771b263
DN
415 vec<ddr_p> dependence_relations;
416 vec<data_reference_p> datarefs;
5f40b3cb
ZD
417 lambda_trans_matrix trans;
418 bool ret = false;
5f40b3cb
ZD
419
420 if (dump_file && (dump_flags & TDF_DETAILS))
48710229
RL
421 {
422 fprintf (dump_file, "Considering loop %d\n", loop->num);
423 if (!loop->inner)
424 fprintf (dump_file, "loop is innermost\n");
b8698a0f 425 else
48710229
RL
426 fprintf (dump_file, "loop NOT innermost\n");
427 }
5f40b3cb 428
5f40b3cb
ZD
429 /* Check for problems with dependences. If the loop can be reversed,
430 the iterations are independent. */
00f96dc9 431 auto_vec<loop_p, 3> loop_nest;
9771b263 432 datarefs.create (10);
07687835 433 dependence_relations.create (100);
9ca3d00e
AB
434 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
435 &dependence_relations))
436 {
437 if (dump_file && (dump_flags & TDF_DETAILS))
438 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
439 ret = false;
440 goto end;
441 }
5f40b3cb
ZD
442 if (dump_file && (dump_flags & TDF_DETAILS))
443 dump_data_dependence_relations (dump_file, dependence_relations);
444
f873b205 445 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
5f40b3cb
ZD
446 LTM_MATRIX (trans)[0][0] = -1;
447
448 if (lambda_transform_legal_p (trans, 1, dependence_relations))
449 {
450 ret = true;
451 if (dump_file && (dump_flags & TDF_DETAILS))
452 fprintf (dump_file, " SUCCESS: may be parallelized\n");
453 }
454 else if (dump_file && (dump_flags & TDF_DETAILS))
a509ebb5
RL
455 fprintf (dump_file,
456 " FAILED: data dependencies exist across iterations\n");
5f40b3cb 457
9ca3d00e 458 end:
5f40b3cb
ZD
459 free_dependence_relations (dependence_relations);
460 free_data_refs (datarefs);
461
462 return ret;
463}
464
1d4af1e8
SP
465/* Return true when LOOP contains basic blocks marked with the
466 BB_IRREDUCIBLE_LOOP flag. */
467
468static inline bool
469loop_has_blocks_with_irreducible_flag (struct loop *loop)
470{
471 unsigned i;
472 basic_block *bbs = get_loop_body_in_dom_order (loop);
473 bool res = true;
474
475 for (i = 0; i < loop->num_nodes; i++)
476 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
477 goto end;
478
479 res = false;
480 end:
481 free (bbs);
482 return res;
483}
484
8a171a59 485/* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
9f9f72aa 486 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
8a171a59 487 to their addresses that can be reused. The address of OBJ is known to
cba1eb61
JJ
488 be invariant in the whole function. Other needed statements are placed
489 right before GSI. */
5f40b3cb
ZD
490
491static tree
4a8fb1a1 492take_address_of (tree obj, tree type, edge entry,
c203e8a7 493 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
5f40b3cb 494{
8a171a59 495 int uid;
83d5977e 496 tree *var_p, name, addr;
538dd0b7 497 gassign *stmt;
726a989a 498 gimple_seq stmts;
5f40b3cb 499
8a171a59
ZD
500 /* Since the address of OBJ is invariant, the trees may be shared.
501 Avoid rewriting unrelated parts of the code. */
502 obj = unshare_expr (obj);
503 for (var_p = &obj;
504 handled_component_p (*var_p);
505 var_p = &TREE_OPERAND (*var_p, 0))
506 continue;
8a171a59 507
c9a410f0
RG
508 /* Canonicalize the access to base on a MEM_REF. */
509 if (DECL_P (*var_p))
510 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
511
512 /* Assign a canonical SSA name to the address of the base decl used
513 in the address and share it for all accesses and addresses based
514 on it. */
515 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
84baa4b9
TS
516 int_tree_map elt;
517 elt.uid = uid;
518 int_tree_map *slot = decl_address->find_slot (elt, INSERT);
519 if (!slot->to)
5f40b3cb 520 {
cba1eb61
JJ
521 if (gsi == NULL)
522 return NULL;
c9a410f0 523 addr = TREE_OPERAND (*var_p, 0);
29b89442
JJ
524 const char *obj_name
525 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
526 if (obj_name)
527 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
528 else
b731b390 529 name = make_ssa_name (TREE_TYPE (addr));
83d5977e 530 stmt = gimple_build_assign (name, addr);
726a989a 531 gsi_insert_on_edge_immediate (entry, stmt);
5f40b3cb 532
84baa4b9
TS
533 slot->uid = uid;
534 slot->to = name;
5f40b3cb 535 }
8a171a59 536 else
84baa4b9 537 name = slot->to;
5f40b3cb 538
c9a410f0
RG
539 /* Express the address in terms of the canonical SSA name. */
540 TREE_OPERAND (*var_p, 0) = name;
cba1eb61
JJ
541 if (gsi == NULL)
542 return build_fold_addr_expr_with_type (obj, type);
543
aa00059c 544 name = force_gimple_operand (build_addr (obj),
c9a410f0
RG
545 &stmts, true, NULL_TREE);
546 if (!gimple_seq_empty_p (stmts))
cba1eb61 547 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
5f40b3cb 548
c9a410f0 549 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
8a171a59 550 {
726a989a 551 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
8a171a59 552 NULL_TREE);
726a989a 553 if (!gimple_seq_empty_p (stmts))
cba1eb61 554 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
8a171a59 555 }
5f40b3cb
ZD
556
557 return name;
558}
559
12efb1d7 560static tree
355fe088 561reduc_stmt_res (gimple *stmt)
12efb1d7
TV
562{
563 return (gimple_code (stmt) == GIMPLE_PHI
564 ? gimple_phi_result (stmt)
565 : gimple_assign_lhs (stmt));
566}
567
a509ebb5 568/* Callback for htab_traverse. Create the initialization statement
b8698a0f 569 for reduction described in SLOT, and place it at the preheader of
a509ebb5
RL
570 the loop described in DATA. */
571
4a8fb1a1
LC
572int
573initialize_reductions (reduction_info **slot, struct loop *loop)
a509ebb5 574{
f2c9f71d
TS
575 tree init;
576 tree type, arg;
a509ebb5
RL
577 edge e;
578
4a8fb1a1 579 struct reduction_info *const reduc = *slot;
a509ebb5 580
b8698a0f 581 /* Create initialization in preheader:
a509ebb5
RL
582 reduction_variable = initialization value of reduction. */
583
b8698a0f 584 /* In the phi node at the header, replace the argument coming
a509ebb5
RL
585 from the preheader with the reduction initialization value. */
586
f2c9f71d 587 /* Initialize the reduction. */
a509ebb5 588 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
f2c9f71d
TS
589 init = omp_reduction_init_op (gimple_location (reduc->reduc_stmt),
590 reduc->reduction_code, type);
a509ebb5
RL
591 reduc->init = init;
592
b8698a0f
L
593 /* Replace the argument representing the initialization value
594 with the initialization value for the reduction (neutral
595 element for the particular operation, e.g. 0 for PLUS_EXPR,
596 1 for MULT_EXPR, etc).
597 Keep the old value in a new variable "reduction_initial",
598 that will be taken in consideration after the parallel
0eb7e7aa 599 computing is done. */
a509ebb5
RL
600
601 e = loop_preheader_edge (loop);
602 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
603 /* Create new variable to hold the initial value. */
a509ebb5 604
a509ebb5 605 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
0eb7e7aa 606 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
ae0bce62 607 reduc->initial_value = arg;
a509ebb5
RL
608 return 1;
609}
5f40b3cb
ZD
610
611struct elv_data
612{
726a989a 613 struct walk_stmt_info info;
9f9f72aa 614 edge entry;
c203e8a7 615 int_tree_htab_type *decl_address;
cba1eb61 616 gimple_stmt_iterator *gsi;
5f40b3cb 617 bool changed;
cba1eb61 618 bool reset;
5f40b3cb
ZD
619};
620
9f9f72aa
AP
621/* Eliminates references to local variables in *TP out of the single
622 entry single exit region starting at DTA->ENTRY.
623 DECL_ADDRESS contains addresses of the references that had their
624 address taken already. If the expression is changed, CHANGED is
625 set to true. Callback for walk_tree. */
a509ebb5 626
5f40b3cb 627static tree
8a171a59 628eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
5f40b3cb 629{
3d9a9f94 630 struct elv_data *const dta = (struct elv_data *) data;
8a171a59 631 tree t = *tp, var, addr, addr_type, type, obj;
5f40b3cb
ZD
632
633 if (DECL_P (t))
634 {
635 *walk_subtrees = 0;
636
637 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
638 return NULL_TREE;
639
640 type = TREE_TYPE (t);
641 addr_type = build_pointer_type (type);
cba1eb61
JJ
642 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
643 dta->gsi);
644 if (dta->gsi == NULL && addr == NULL_TREE)
645 {
646 dta->reset = true;
647 return NULL_TREE;
648 }
649
70f34814 650 *tp = build_simple_mem_ref (addr);
5f40b3cb
ZD
651
652 dta->changed = true;
653 return NULL_TREE;
654 }
655
656 if (TREE_CODE (t) == ADDR_EXPR)
657 {
8a171a59
ZD
658 /* ADDR_EXPR may appear in two contexts:
659 -- as a gimple operand, when the address taken is a function invariant
660 -- as gimple rhs, when the resulting address in not a function
661 invariant
662 We do not need to do anything special in the latter case (the base of
663 the memory reference whose address is taken may be replaced in the
664 DECL_P case). The former case is more complicated, as we need to
665 ensure that the new address is still a gimple operand. Thus, it
666 is not sufficient to replace just the base of the memory reference --
667 we need to move the whole computation of the address out of the
668 loop. */
669 if (!is_gimple_val (t))
5f40b3cb
ZD
670 return NULL_TREE;
671
672 *walk_subtrees = 0;
8a171a59
ZD
673 obj = TREE_OPERAND (t, 0);
674 var = get_base_address (obj);
675 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
5f40b3cb
ZD
676 return NULL_TREE;
677
678 addr_type = TREE_TYPE (t);
cba1eb61
JJ
679 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
680 dta->gsi);
681 if (dta->gsi == NULL && addr == NULL_TREE)
682 {
683 dta->reset = true;
684 return NULL_TREE;
685 }
5f40b3cb
ZD
686 *tp = addr;
687
688 dta->changed = true;
689 return NULL_TREE;
690 }
691
726a989a 692 if (!EXPR_P (t))
5f40b3cb
ZD
693 *walk_subtrees = 0;
694
695 return NULL_TREE;
696}
697
cba1eb61 698/* Moves the references to local variables in STMT at *GSI out of the single
9f9f72aa
AP
699 entry single exit region starting at ENTRY. DECL_ADDRESS contains
700 addresses of the references that had their address taken
701 already. */
5f40b3cb
ZD
702
703static void
cba1eb61 704eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
c203e8a7 705 int_tree_htab_type *decl_address)
5f40b3cb
ZD
706{
707 struct elv_data dta;
355fe088 708 gimple *stmt = gsi_stmt (*gsi);
5f40b3cb 709
726a989a 710 memset (&dta.info, '\0', sizeof (dta.info));
9f9f72aa 711 dta.entry = entry;
5f40b3cb
ZD
712 dta.decl_address = decl_address;
713 dta.changed = false;
cba1eb61 714 dta.reset = false;
5f40b3cb 715
b5b8b0ac 716 if (gimple_debug_bind_p (stmt))
cba1eb61
JJ
717 {
718 dta.gsi = NULL;
719 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
720 eliminate_local_variables_1, &dta.info, NULL);
721 if (dta.reset)
722 {
723 gimple_debug_bind_reset_value (stmt);
724 dta.changed = true;
725 }
726 }
29b89442
JJ
727 else if (gimple_clobber_p (stmt))
728 {
42fb90d7 729 unlink_stmt_vdef (stmt);
29b89442
JJ
730 stmt = gimple_build_nop ();
731 gsi_replace (gsi, stmt, false);
732 dta.changed = true;
733 }
b5b8b0ac 734 else
cba1eb61
JJ
735 {
736 dta.gsi = gsi;
737 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
738 }
5f40b3cb
ZD
739
740 if (dta.changed)
741 update_stmt (stmt);
742}
743
9f9f72aa
AP
744/* Eliminates the references to local variables from the single entry
745 single exit region between the ENTRY and EXIT edges.
b8698a0f 746
a509ebb5 747 This includes:
b8698a0f
L
748 1) Taking address of a local variable -- these are moved out of the
749 region (and temporary variable is created to hold the address if
a509ebb5 750 necessary).
9f9f72aa 751
5f40b3cb 752 2) Dereferencing a local variable -- these are replaced with indirect
a509ebb5 753 references. */
5f40b3cb
ZD
754
755static void
9f9f72aa 756eliminate_local_variables (edge entry, edge exit)
5f40b3cb 757{
9f9f72aa 758 basic_block bb;
00f96dc9 759 auto_vec<basic_block, 3> body;
5f40b3cb 760 unsigned i;
726a989a 761 gimple_stmt_iterator gsi;
cba1eb61 762 bool has_debug_stmt = false;
c203e8a7 763 int_tree_htab_type decl_address (10);
9f9f72aa
AP
764 basic_block entry_bb = entry->src;
765 basic_block exit_bb = exit->dest;
5f40b3cb 766
9f9f72aa 767 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 768
9771b263 769 FOR_EACH_VEC_ELT (body, i, bb)
9f9f72aa 770 if (bb != entry_bb && bb != exit_bb)
6b37bdaf
PP
771 {
772 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
773 if (is_gimple_debug (gsi_stmt (gsi)))
774 {
775 if (gimple_debug_bind_p (gsi_stmt (gsi)))
776 has_debug_stmt = true;
777 }
778 else
779 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
780 }
cba1eb61
JJ
781
782 if (has_debug_stmt)
9771b263 783 FOR_EACH_VEC_ELT (body, i, bb)
cba1eb61
JJ
784 if (bb != entry_bb && bb != exit_bb)
785 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
786 if (gimple_debug_bind_p (gsi_stmt (gsi)))
c203e8a7 787 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
9f9f72aa
AP
788}
789
790/* Returns true if expression EXPR is not defined between ENTRY and
791 EXIT, i.e. if all its operands are defined outside of the region. */
792
793static bool
794expr_invariant_in_region_p (edge entry, edge exit, tree expr)
795{
796 basic_block entry_bb = entry->src;
797 basic_block exit_bb = exit->dest;
798 basic_block def_bb;
9f9f72aa
AP
799
800 if (is_gimple_min_invariant (expr))
801 return true;
802
803 if (TREE_CODE (expr) == SSA_NAME)
804 {
726a989a 805 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
9f9f72aa
AP
806 if (def_bb
807 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
808 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
809 return false;
810
811 return true;
812 }
813
726a989a 814 return false;
5f40b3cb
ZD
815}
816
817/* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
818 The copies are stored to NAME_COPIES, if NAME was already duplicated,
819 its duplicate stored in NAME_COPIES is returned.
b8698a0f 820
5f40b3cb
ZD
821 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
822 duplicated, storing the copies in DECL_COPIES. */
823
824static tree
c203e8a7
TS
825separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
826 int_tree_htab_type *decl_copies,
827 bool copy_name_p)
5f40b3cb
ZD
828{
829 tree copy, var, var_copy;
830 unsigned idx, uid, nuid;
84baa4b9 831 struct int_tree_map ielt;
5f40b3cb 832 struct name_to_copy_elt elt, *nelt;
4a8fb1a1 833 name_to_copy_elt **slot;
84baa4b9 834 int_tree_map *dslot;
5f40b3cb
ZD
835
836 if (TREE_CODE (name) != SSA_NAME)
837 return name;
838
839 idx = SSA_NAME_VERSION (name);
840 elt.version = idx;
c203e8a7
TS
841 slot = name_copies->find_slot_with_hash (&elt, idx,
842 copy_name_p ? INSERT : NO_INSERT);
5f40b3cb 843 if (slot && *slot)
4a8fb1a1 844 return (*slot)->new_name;
5f40b3cb 845
70b5e7dc
RG
846 if (copy_name_p)
847 {
848 copy = duplicate_ssa_name (name, NULL);
849 nelt = XNEW (struct name_to_copy_elt);
850 nelt->version = idx;
851 nelt->new_name = copy;
852 nelt->field = NULL_TREE;
853 *slot = nelt;
854 }
855 else
856 {
857 gcc_assert (!slot);
858 copy = name;
859 }
860
5f40b3cb 861 var = SSA_NAME_VAR (name);
70b5e7dc
RG
862 if (!var)
863 return copy;
864
5f40b3cb
ZD
865 uid = DECL_UID (var);
866 ielt.uid = uid;
84baa4b9
TS
867 dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
868 if (!dslot->to)
5f40b3cb
ZD
869 {
870 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
36ad7922 871 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
84baa4b9
TS
872 dslot->uid = uid;
873 dslot->to = var_copy;
5f40b3cb
ZD
874
875 /* Ensure that when we meet this decl next time, we won't duplicate
a509ebb5 876 it again. */
5f40b3cb
ZD
877 nuid = DECL_UID (var_copy);
878 ielt.uid = nuid;
84baa4b9
TS
879 dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
880 gcc_assert (!dslot->to);
881 dslot->uid = nuid;
882 dslot->to = var_copy;
5f40b3cb
ZD
883 }
884 else
84baa4b9 885 var_copy = dslot->to;
5f40b3cb 886
b2ec94d4 887 replace_ssa_name_symbol (copy, var_copy);
5f40b3cb
ZD
888 return copy;
889}
890
9f9f72aa
AP
891/* Finds the ssa names used in STMT that are defined outside the
892 region between ENTRY and EXIT and replaces such ssa names with
893 their duplicates. The duplicates are stored to NAME_COPIES. Base
894 decls of all ssa names used in STMT (including those defined in
895 LOOP) are replaced with the new temporary variables; the
896 replacement decls are stored in DECL_COPIES. */
5f40b3cb
ZD
897
898static void
355fe088 899separate_decls_in_region_stmt (edge entry, edge exit, gimple *stmt,
c203e8a7
TS
900 name_to_copy_table_type *name_copies,
901 int_tree_htab_type *decl_copies)
5f40b3cb
ZD
902{
903 use_operand_p use;
904 def_operand_p def;
905 ssa_op_iter oi;
906 tree name, copy;
907 bool copy_name_p;
908
5f40b3cb 909 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
a509ebb5
RL
910 {
911 name = DEF_FROM_PTR (def);
912 gcc_assert (TREE_CODE (name) == SSA_NAME);
9f9f72aa
AP
913 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
914 false);
a509ebb5
RL
915 gcc_assert (copy == name);
916 }
5f40b3cb
ZD
917
918 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
a509ebb5
RL
919 {
920 name = USE_FROM_PTR (use);
921 if (TREE_CODE (name) != SSA_NAME)
922 continue;
923
9f9f72aa
AP
924 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
925 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
926 copy_name_p);
a509ebb5
RL
927 SET_USE (use, copy);
928 }
5f40b3cb
ZD
929}
930
b5b8b0ac
AO
931/* Finds the ssa names used in STMT that are defined outside the
932 region between ENTRY and EXIT and replaces such ssa names with
933 their duplicates. The duplicates are stored to NAME_COPIES. Base
934 decls of all ssa names used in STMT (including those defined in
935 LOOP) are replaced with the new temporary variables; the
936 replacement decls are stored in DECL_COPIES. */
937
938static bool
355fe088 939separate_decls_in_region_debug (gimple *stmt,
c203e8a7
TS
940 name_to_copy_table_type *name_copies,
941 int_tree_htab_type *decl_copies)
b5b8b0ac
AO
942{
943 use_operand_p use;
944 ssa_op_iter oi;
945 tree var, name;
946 struct int_tree_map ielt;
947 struct name_to_copy_elt elt;
4a8fb1a1 948 name_to_copy_elt **slot;
84baa4b9 949 int_tree_map *dslot;
b5b8b0ac 950
ddb555ed
JJ
951 if (gimple_debug_bind_p (stmt))
952 var = gimple_debug_bind_get_var (stmt);
953 else if (gimple_debug_source_bind_p (stmt))
954 var = gimple_debug_source_bind_get_var (stmt);
955 else
956 return true;
598e67d7 957 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
4f2a9af8 958 return true;
b5b8b0ac
AO
959 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
960 ielt.uid = DECL_UID (var);
84baa4b9 961 dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
b5b8b0ac
AO
962 if (!dslot)
963 return true;
ddb555ed 964 if (gimple_debug_bind_p (stmt))
84baa4b9 965 gimple_debug_bind_set_var (stmt, dslot->to);
ddb555ed 966 else if (gimple_debug_source_bind_p (stmt))
84baa4b9 967 gimple_debug_source_bind_set_var (stmt, dslot->to);
b5b8b0ac
AO
968
969 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
970 {
971 name = USE_FROM_PTR (use);
972 if (TREE_CODE (name) != SSA_NAME)
973 continue;
974
975 elt.version = SSA_NAME_VERSION (name);
c203e8a7 976 slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
b5b8b0ac
AO
977 if (!slot)
978 {
979 gimple_debug_bind_reset_value (stmt);
980 update_stmt (stmt);
981 break;
982 }
983
4a8fb1a1 984 SET_USE (use, (*slot)->new_name);
b5b8b0ac
AO
985 }
986
987 return false;
988}
989
0eb7e7aa
RL
990/* Callback for htab_traverse. Adds a field corresponding to the reduction
991 specified in SLOT. The type is passed in DATA. */
992
4a8fb1a1
LC
993int
994add_field_for_reduction (reduction_info **slot, tree type)
a509ebb5 995{
b8698a0f 996
4a8fb1a1 997 struct reduction_info *const red = *slot;
12efb1d7 998 tree var = reduc_stmt_res (red->reduc_stmt);
aa06a978
RB
999 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
1000 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
0eb7e7aa
RL
1001
1002 insert_field_into_struct (type, field);
1003
1004 red->field = field;
1005
1006 return 1;
1007}
a509ebb5 1008
5f40b3cb 1009/* Callback for htab_traverse. Adds a field corresponding to a ssa name
b8698a0f 1010 described in SLOT. The type is passed in DATA. */
5f40b3cb 1011
4a8fb1a1
LC
1012int
1013add_field_for_name (name_to_copy_elt **slot, tree type)
5f40b3cb 1014{
4a8fb1a1 1015 struct name_to_copy_elt *const elt = *slot;
5f40b3cb 1016 tree name = ssa_name (elt->version);
70b5e7dc
RG
1017 tree field = build_decl (UNKNOWN_LOCATION,
1018 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1019 TREE_TYPE (name));
5f40b3cb
ZD
1020
1021 insert_field_into_struct (type, field);
1022 elt->field = field;
a509ebb5 1023
5f40b3cb
ZD
1024 return 1;
1025}
1026
b8698a0f
L
1027/* Callback for htab_traverse. A local result is the intermediate result
1028 computed by a single
fa10beec 1029 thread, or the initial value in case no iteration was executed.
b8698a0f
L
1030 This function creates a phi node reflecting these values.
1031 The phi's result will be stored in NEW_PHI field of the
1032 reduction's data structure. */
a509ebb5 1033
4a8fb1a1
LC
1034int
1035create_phi_for_local_result (reduction_info **slot, struct loop *loop)
a509ebb5 1036{
4a8fb1a1 1037 struct reduction_info *const reduc = *slot;
a509ebb5 1038 edge e;
538dd0b7 1039 gphi *new_phi;
e67d7a1e 1040 basic_block store_bb, continue_bb;
a509ebb5 1041 tree local_res;
f5045c96 1042 source_location locus;
a509ebb5 1043
b8698a0f
L
1044 /* STORE_BB is the block where the phi
1045 should be stored. It is the destination of the loop exit.
726a989a 1046 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
e67d7a1e
TV
1047 continue_bb = single_pred (loop->latch);
1048 store_bb = FALLTHRU_EDGE (continue_bb)->dest;
a509ebb5
RL
1049
1050 /* STORE_BB has two predecessors. One coming from the loop
1051 (the reduction's result is computed at the loop),
b8698a0f
L
1052 and another coming from a block preceding the loop,
1053 when no iterations
1054 are executed (the initial value should be taken). */
e67d7a1e 1055 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (continue_bb))
a509ebb5
RL
1056 e = EDGE_PRED (store_bb, 1);
1057 else
1058 e = EDGE_PRED (store_bb, 0);
12efb1d7
TV
1059 tree lhs = reduc_stmt_res (reduc->reduc_stmt);
1060 local_res = copy_ssa_name (lhs);
f5045c96 1061 locus = gimple_location (reduc->reduc_stmt);
a509ebb5 1062 new_phi = create_phi_node (local_res, store_bb);
9e227d60 1063 add_phi_arg (new_phi, reduc->init, e, locus);
e67d7a1e 1064 add_phi_arg (new_phi, lhs, FALLTHRU_EDGE (continue_bb), locus);
a509ebb5
RL
1065 reduc->new_phi = new_phi;
1066
1067 return 1;
1068}
5f40b3cb
ZD
1069
1070struct clsn_data
1071{
1072 tree store;
1073 tree load;
1074
1075 basic_block store_bb;
1076 basic_block load_bb;
1077};
1078
a509ebb5 1079/* Callback for htab_traverse. Create an atomic instruction for the
b8698a0f 1080 reduction described in SLOT.
a509ebb5
RL
1081 DATA annotates the place in memory the atomic operation relates to,
1082 and the basic block it needs to be generated in. */
1083
4a8fb1a1
LC
1084int
1085create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
a509ebb5 1086{
4a8fb1a1 1087 struct reduction_info *const reduc = *slot;
726a989a 1088 gimple_stmt_iterator gsi;
a509ebb5 1089 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
a509ebb5
RL
1090 tree load_struct;
1091 basic_block bb;
1092 basic_block new_bb;
1093 edge e;
0f900dfa 1094 tree t, addr, ref, x;
726a989a 1095 tree tmp_load, name;
355fe088 1096 gimple *load;
a509ebb5 1097
61d9c527
TV
1098 if (reduc->reduc_addr == NULL_TREE)
1099 {
1100 load_struct = build_simple_mem_ref (clsn_data->load);
1101 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
a509ebb5 1102
61d9c527
TV
1103 addr = build_addr (t);
1104 }
1105 else
1106 {
1107 /* Set the address for the atomic store. */
1108 addr = reduc->reduc_addr;
1109
1110 /* Remove the non-atomic store '*addr = sum'. */
1111 tree res = PHI_RESULT (reduc->keep_res);
1112 use_operand_p use_p;
1113 gimple *stmt;
1114 bool single_use_p = single_imm_use (res, &use_p, &stmt);
1115 gcc_assert (single_use_p);
1116 replace_uses_by (gimple_vdef (stmt),
1117 gimple_vuse (stmt));
1118 gimple_stmt_iterator gsi = gsi_for_stmt (stmt);
1119 gsi_remove (&gsi, true);
1120 }
a509ebb5
RL
1121
1122 /* Create phi node. */
1123 bb = clsn_data->load_bb;
1124
b13c907a
RB
1125 gsi = gsi_last_bb (bb);
1126 e = split_block (bb, gsi_stmt (gsi));
a509ebb5
RL
1127 new_bb = e->dest;
1128
b731b390
JJ
1129 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1130 tmp_load = make_ssa_name (tmp_load);
726a989a 1131 load = gimple_build_omp_atomic_load (tmp_load, addr);
a509ebb5 1132 SSA_NAME_DEF_STMT (tmp_load) = load;
726a989a
RB
1133 gsi = gsi_start_bb (new_bb);
1134 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
a509ebb5
RL
1135
1136 e = split_block (new_bb, load);
1137 new_bb = e->dest;
726a989a 1138 gsi = gsi_start_bb (new_bb);
a509ebb5 1139 ref = tmp_load;
726a989a
RB
1140 x = fold_build2 (reduc->reduction_code,
1141 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1142 PHI_RESULT (reduc->new_phi));
a509ebb5 1143
726a989a
RB
1144 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1145 GSI_CONTINUE_LINKING);
a509ebb5 1146
726a989a 1147 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
a509ebb5
RL
1148 return 1;
1149}
1150
b8698a0f
L
1151/* Create the atomic operation at the join point of the threads.
1152 REDUCTION_LIST describes the reductions in the LOOP.
1153 LD_ST_DATA describes the shared data structure where
a509ebb5
RL
1154 shared data is stored in and loaded from. */
1155static void
4a8fb1a1 1156create_call_for_reduction (struct loop *loop,
c203e8a7 1157 reduction_info_table_type *reduction_list,
a509ebb5
RL
1158 struct clsn_data *ld_st_data)
1159{
c203e8a7 1160 reduction_list->traverse <struct loop *, create_phi_for_local_result> (loop);
726a989a 1161 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
e67d7a1e
TV
1162 basic_block continue_bb = single_pred (loop->latch);
1163 ld_st_data->load_bb = FALLTHRU_EDGE (continue_bb)->dest;
4a8fb1a1 1164 reduction_list
c203e8a7 1165 ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
a509ebb5
RL
1166}
1167
ae0bce62
RL
1168/* Callback for htab_traverse. Loads the final reduction value at the
1169 join point of all threads, and inserts it in the right place. */
a509ebb5 1170
4a8fb1a1
LC
1171int
1172create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
a509ebb5 1173{
4a8fb1a1 1174 struct reduction_info *const red = *slot;
355fe088 1175 gimple *stmt;
726a989a 1176 gimple_stmt_iterator gsi;
12efb1d7 1177 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
a509ebb5 1178 tree load_struct;
ae0bce62 1179 tree name;
a509ebb5
RL
1180 tree x;
1181
79855460
TV
1182 /* If there's no exit phi, the result of the reduction is unused. */
1183 if (red->keep_res == NULL)
1184 return 1;
1185
726a989a 1186 gsi = gsi_after_labels (clsn_data->load_bb);
70f34814 1187 load_struct = build_simple_mem_ref (clsn_data->load);
a509ebb5
RL
1188 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1189 NULL_TREE);
a509ebb5 1190
ae0bce62 1191 x = load_struct;
a509ebb5 1192 name = PHI_RESULT (red->keep_res);
726a989a 1193 stmt = gimple_build_assign (name, x);
a509ebb5 1194
726a989a 1195 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
a509ebb5 1196
726a989a
RB
1197 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1198 !gsi_end_p (gsi); gsi_next (&gsi))
1199 if (gsi_stmt (gsi) == red->keep_res)
1200 {
1201 remove_phi_node (&gsi, false);
1202 return 1;
1203 }
1204 gcc_unreachable ();
a509ebb5
RL
1205}
1206
b8698a0f 1207/* Load the reduction result that was stored in LD_ST_DATA.
a509ebb5 1208 REDUCTION_LIST describes the list of reductions that the
fa10beec 1209 loads should be generated for. */
a509ebb5 1210static void
c203e8a7 1211create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
a509ebb5
RL
1212 struct clsn_data *ld_st_data)
1213{
726a989a 1214 gimple_stmt_iterator gsi;
a509ebb5 1215 tree t;
355fe088 1216 gimple *stmt;
a509ebb5 1217
726a989a 1218 gsi = gsi_after_labels (ld_st_data->load_bb);
a509ebb5 1219 t = build_fold_addr_expr (ld_st_data->store);
726a989a 1220 stmt = gimple_build_assign (ld_st_data->load, t);
a509ebb5 1221
726a989a 1222 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
a509ebb5 1223
4a8fb1a1 1224 reduction_list
c203e8a7 1225 ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
a509ebb5
RL
1226
1227}
1228
0eb7e7aa
RL
1229/* Callback for htab_traverse. Store the neutral value for the
1230 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1231 1 for MULT_EXPR, etc. into the reduction field.
b8698a0f
L
1232 The reduction is specified in SLOT. The store information is
1233 passed in DATA. */
0eb7e7aa 1234
4a8fb1a1
LC
1235int
1236create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
0eb7e7aa 1237{
4a8fb1a1 1238 struct reduction_info *const red = *slot;
726a989a 1239 tree t;
355fe088 1240 gimple *stmt;
726a989a 1241 gimple_stmt_iterator gsi;
12efb1d7 1242 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
726a989a
RB
1243
1244 gsi = gsi_last_bb (clsn_data->store_bb);
1245 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1246 stmt = gimple_build_assign (t, red->initial_value);
726a989a 1247 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
0eb7e7aa
RL
1248
1249 return 1;
1250}
1251
a509ebb5
RL
1252/* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1253 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1254 specified in SLOT. */
1255
4a8fb1a1
LC
1256int
1257create_loads_and_stores_for_name (name_to_copy_elt **slot,
1258 struct clsn_data *clsn_data)
5f40b3cb 1259{
4a8fb1a1 1260 struct name_to_copy_elt *const elt = *slot;
726a989a 1261 tree t;
355fe088 1262 gimple *stmt;
726a989a 1263 gimple_stmt_iterator gsi;
5f40b3cb 1264 tree type = TREE_TYPE (elt->new_name);
5f40b3cb
ZD
1265 tree load_struct;
1266
726a989a
RB
1267 gsi = gsi_last_bb (clsn_data->store_bb);
1268 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1269 stmt = gimple_build_assign (t, ssa_name (elt->version));
726a989a 1270 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb 1271
726a989a 1272 gsi = gsi_last_bb (clsn_data->load_bb);
70f34814 1273 load_struct = build_simple_mem_ref (clsn_data->load);
726a989a
RB
1274 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1275 stmt = gimple_build_assign (elt->new_name, t);
726a989a 1276 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1277
1278 return 1;
1279}
1280
1281/* Moves all the variables used in LOOP and defined outside of it (including
1282 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1283 name) to a structure created for this purpose. The code
b8698a0f 1284
5f40b3cb
ZD
1285 while (1)
1286 {
1287 use (a);
1288 use (b);
1289 }
1290
1291 is transformed this way:
1292
1293 bb0:
1294 old.a = a;
1295 old.b = b;
1296
1297 bb1:
1298 a' = new->a;
1299 b' = new->b;
1300 while (1)
1301 {
1302 use (a');
1303 use (b');
1304 }
1305
1306 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1307 pointer `new' is intentionally not initialized (the loop will be split to a
1308 separate function later, and `new' will be initialized from its arguments).
a509ebb5 1309 LD_ST_DATA holds information about the shared data structure used to pass
b8698a0f
L
1310 information among the threads. It is initialized here, and
1311 gen_parallel_loop will pass it to create_call_for_reduction that
1312 needs this information. REDUCTION_LIST describes the reductions
a509ebb5 1313 in LOOP. */
5f40b3cb
ZD
1314
1315static void
4a8fb1a1 1316separate_decls_in_region (edge entry, edge exit,
c203e8a7 1317 reduction_info_table_type *reduction_list,
b8698a0f 1318 tree *arg_struct, tree *new_arg_struct,
9f9f72aa 1319 struct clsn_data *ld_st_data)
a509ebb5 1320
5f40b3cb 1321{
9f9f72aa 1322 basic_block bb1 = split_edge (entry);
5f40b3cb 1323 basic_block bb0 = single_pred (bb1);
c203e8a7
TS
1324 name_to_copy_table_type name_copies (10);
1325 int_tree_htab_type decl_copies (10);
5f40b3cb 1326 unsigned i;
726a989a
RB
1327 tree type, type_name, nvar;
1328 gimple_stmt_iterator gsi;
5f40b3cb 1329 struct clsn_data clsn_data;
00f96dc9 1330 auto_vec<basic_block, 3> body;
9f9f72aa
AP
1331 basic_block bb;
1332 basic_block entry_bb = bb1;
1333 basic_block exit_bb = exit->dest;
b5b8b0ac 1334 bool has_debug_stmt = false;
5f40b3cb 1335
726a989a 1336 entry = single_succ_edge (entry_bb);
9f9f72aa 1337 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 1338
9771b263 1339 FOR_EACH_VEC_ELT (body, i, bb)
9f9f72aa 1340 {
b8698a0f 1341 if (bb != entry_bb && bb != exit_bb)
9f9f72aa 1342 {
726a989a
RB
1343 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1344 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
c203e8a7 1345 &name_copies, &decl_copies);
726a989a
RB
1346
1347 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
b5b8b0ac 1348 {
355fe088 1349 gimple *stmt = gsi_stmt (gsi);
b5b8b0ac
AO
1350
1351 if (is_gimple_debug (stmt))
1352 has_debug_stmt = true;
1353 else
1354 separate_decls_in_region_stmt (entry, exit, stmt,
c203e8a7 1355 &name_copies, &decl_copies);
b5b8b0ac 1356 }
9f9f72aa 1357 }
5f40b3cb 1358 }
9f9f72aa 1359
b5b8b0ac
AO
1360 /* Now process debug bind stmts. We must not create decls while
1361 processing debug stmts, so we defer their processing so as to
1362 make sure we will have debug info for as many variables as
1363 possible (all of those that were dealt with in the loop above),
1364 and discard those for which we know there's nothing we can
1365 do. */
1366 if (has_debug_stmt)
9771b263 1367 FOR_EACH_VEC_ELT (body, i, bb)
b5b8b0ac
AO
1368 if (bb != entry_bb && bb != exit_bb)
1369 {
1370 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1371 {
355fe088 1372 gimple *stmt = gsi_stmt (gsi);
b5b8b0ac 1373
ddb555ed 1374 if (is_gimple_debug (stmt))
b5b8b0ac 1375 {
c203e8a7
TS
1376 if (separate_decls_in_region_debug (stmt, &name_copies,
1377 &decl_copies))
b5b8b0ac
AO
1378 {
1379 gsi_remove (&gsi, true);
1380 continue;
1381 }
1382 }
1383
1384 gsi_next (&gsi);
1385 }
1386 }
1387
c203e8a7 1388 if (name_copies.elements () == 0 && reduction_list->elements () == 0)
5f40b3cb
ZD
1389 {
1390 /* It may happen that there is nothing to copy (if there are only
a509ebb5 1391 loop carried and external variables in the loop). */
5f40b3cb
ZD
1392 *arg_struct = NULL;
1393 *new_arg_struct = NULL;
1394 }
1395 else
1396 {
1397 /* Create the type for the structure to store the ssa names to. */
1398 type = lang_hooks.types.make_type (RECORD_TYPE);
9ff70652 1399 type_name = build_decl (UNKNOWN_LOCATION,
c2255bc4 1400 TYPE_DECL, create_tmp_var_name (".paral_data"),
5f40b3cb
ZD
1401 type);
1402 TYPE_NAME (type) = type_name;
1403
4a8fb1a1 1404 name_copies.traverse <tree, add_field_for_name> (type);
c203e8a7 1405 if (reduction_list && reduction_list->elements () > 0)
0eb7e7aa
RL
1406 {
1407 /* Create the fields for reductions. */
c203e8a7 1408 reduction_list->traverse <tree, add_field_for_reduction> (type);
0eb7e7aa 1409 }
5f40b3cb 1410 layout_type (type);
b8698a0f 1411
5f40b3cb
ZD
1412 /* Create the loads and stores. */
1413 *arg_struct = create_tmp_var (type, ".paral_data_store");
5f40b3cb 1414 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
b731b390 1415 *new_arg_struct = make_ssa_name (nvar);
5f40b3cb 1416
a509ebb5
RL
1417 ld_st_data->store = *arg_struct;
1418 ld_st_data->load = *new_arg_struct;
1419 ld_st_data->store_bb = bb0;
1420 ld_st_data->load_bb = bb1;
0eb7e7aa 1421
4a8fb1a1
LC
1422 name_copies
1423 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1424 (ld_st_data);
a509ebb5 1425
ae0bce62
RL
1426 /* Load the calculation from memory (after the join of the threads). */
1427
c203e8a7 1428 if (reduction_list && reduction_list->elements () > 0)
a509ebb5 1429 {
4a8fb1a1 1430 reduction_list
c203e8a7
TS
1431 ->traverse <struct clsn_data *, create_stores_for_reduction>
1432 (ld_st_data);
b731b390 1433 clsn_data.load = make_ssa_name (nvar);
9f9f72aa 1434 clsn_data.load_bb = exit->dest;
a509ebb5
RL
1435 clsn_data.store = ld_st_data->store;
1436 create_final_loads_for_reduction (reduction_list, &clsn_data);
1437 }
5f40b3cb 1438 }
5f40b3cb
ZD
1439}
1440
a79b7ec5 1441/* Returns true if FN was created to run in parallel. */
5f40b3cb 1442
62e0a1ed 1443bool
a79b7ec5 1444parallelized_function_p (tree fndecl)
5f40b3cb 1445{
a79b7ec5
TV
1446 cgraph_node *node = cgraph_node::get (fndecl);
1447 gcc_assert (node != NULL);
1448 return node->parallelized_function;
5f40b3cb
ZD
1449}
1450
1451/* Creates and returns an empty function that will receive the body of
1452 a parallelized loop. */
1453
1454static tree
9ff70652 1455create_loop_fn (location_t loc)
5f40b3cb
ZD
1456{
1457 char buf[100];
1458 char *tname;
1459 tree decl, type, name, t;
1460 struct function *act_cfun = cfun;
1461 static unsigned loopfn_num;
1462
5368224f 1463 loc = LOCATION_LOCUS (loc);
5f40b3cb
ZD
1464 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1465 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1466 clean_symbol_name (tname);
1467 name = get_identifier (tname);
1468 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1469
9ff70652 1470 decl = build_decl (loc, FUNCTION_DECL, name, type);
5f40b3cb
ZD
1471 TREE_STATIC (decl) = 1;
1472 TREE_USED (decl) = 1;
1473 DECL_ARTIFICIAL (decl) = 1;
1474 DECL_IGNORED_P (decl) = 0;
1475 TREE_PUBLIC (decl) = 0;
1476 DECL_UNINLINABLE (decl) = 1;
1477 DECL_EXTERNAL (decl) = 0;
1478 DECL_CONTEXT (decl) = NULL_TREE;
1479 DECL_INITIAL (decl) = make_node (BLOCK);
01771d43 1480 BLOCK_SUPERCONTEXT (DECL_INITIAL (decl)) = decl;
5f40b3cb 1481
9ff70652 1482 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
5f40b3cb
ZD
1483 DECL_ARTIFICIAL (t) = 1;
1484 DECL_IGNORED_P (t) = 1;
1485 DECL_RESULT (decl) = t;
1486
9ff70652 1487 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
5f40b3cb
ZD
1488 ptr_type_node);
1489 DECL_ARTIFICIAL (t) = 1;
1490 DECL_ARG_TYPE (t) = ptr_type_node;
1491 DECL_CONTEXT (t) = decl;
1492 TREE_USED (t) = 1;
1493 DECL_ARGUMENTS (decl) = t;
1494
182e0d71 1495 allocate_struct_function (decl, false);
5f40b3cb
ZD
1496
1497 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1498 it. */
5576d6f2 1499 set_cfun (act_cfun);
5f40b3cb
ZD
1500
1501 return decl;
1502}
1503
7c82d827
TV
1504/* Replace uses of NAME by VAL in block BB. */
1505
1506static void
1507replace_uses_in_bb_by (tree name, tree val, basic_block bb)
1508{
355fe088 1509 gimple *use_stmt;
7c82d827
TV
1510 imm_use_iterator imm_iter;
1511
1512 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
1513 {
1514 if (gimple_bb (use_stmt) != bb)
1515 continue;
1516
1517 use_operand_p use_p;
1518 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1519 SET_USE (use_p, val);
1520 }
1521}
1522
7c82d827
TV
1523/* Do transformation from:
1524
1525 <bb preheader>:
1526 ...
1527 goto <bb header>
1528
1529 <bb header>:
1530 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1531 sum_a = PHI <sum_init (preheader), sum_b (latch)>
1532 ...
1533 use (ivtmp_a)
1534 ...
1535 sum_b = sum_a + sum_update
1536 ...
1537 if (ivtmp_a < n)
1538 goto <bb latch>;
1539 else
1540 goto <bb exit>;
1541
1542 <bb latch>:
1543 ivtmp_b = ivtmp_a + 1;
1544 goto <bb header>
1545
1546 <bb exit>:
712cb0bb 1547 sum_z = PHI <sum_b (cond[1]), ...>
7c82d827
TV
1548
1549 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
1550 that's <bb header>.
1551
1552 to:
1553
1554 <bb preheader>:
1555 ...
1556 goto <bb newheader>
1557
1558 <bb header>:
1559 ivtmp_a = PHI <ivtmp_c (latch)>
1560 sum_a = PHI <sum_c (latch)>
1561 ...
1562 use (ivtmp_a)
1563 ...
1564 sum_b = sum_a + sum_update
1565 ...
1566 goto <bb latch>;
1567
1568 <bb newheader>:
1569 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1570 sum_c = PHI <sum_init (preheader), sum_b (latch)>
1571 if (ivtmp_c < n + 1)
1572 goto <bb header>;
1573 else
712cb0bb 1574 goto <bb newexit>;
7c82d827
TV
1575
1576 <bb latch>:
1577 ivtmp_b = ivtmp_a + 1;
1578 goto <bb newheader>
1579
712cb0bb
TV
1580 <bb newexit>:
1581 sum_y = PHI <sum_c (newheader)>
1582
7c82d827 1583 <bb exit>:
712cb0bb 1584 sum_z = PHI <sum_y (newexit), ...>
7c82d827
TV
1585
1586
1587 In unified diff format:
1588
1589 <bb preheader>:
1590 ...
1591- goto <bb header>
1592+ goto <bb newheader>
1593
1594 <bb header>:
1595- ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1596- sum_a = PHI <sum_init (preheader), sum_b (latch)>
1597+ ivtmp_a = PHI <ivtmp_c (latch)>
1598+ sum_a = PHI <sum_c (latch)>
1599 ...
1600 use (ivtmp_a)
1601 ...
1602 sum_b = sum_a + sum_update
1603 ...
1604- if (ivtmp_a < n)
1605- goto <bb latch>;
1606+ goto <bb latch>;
1607+
1608+ <bb newheader>:
1609+ ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1610+ sum_c = PHI <sum_init (preheader), sum_b (latch)>
1611+ if (ivtmp_c < n + 1)
1612+ goto <bb header>;
1613 else
1614 goto <bb exit>;
1615
1616 <bb latch>:
1617 ivtmp_b = ivtmp_a + 1;
1618- goto <bb header>
1619+ goto <bb newheader>
1620
712cb0bb
TV
1621+ <bb newexit>:
1622+ sum_y = PHI <sum_c (newheader)>
1623
7c82d827 1624 <bb exit>:
712cb0bb
TV
1625- sum_z = PHI <sum_b (cond[1]), ...>
1626+ sum_z = PHI <sum_y (newexit), ...>
7c82d827
TV
1627
1628 Note: the example does not show any virtual phis, but these are handled more
1629 or less as reductions.
b8698a0f 1630
7c82d827
TV
1631
1632 Moves the exit condition of LOOP to the beginning of its header.
1633 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
1634 bound. */
1635
1636static void
1637transform_to_exit_first_loop_alt (struct loop *loop,
1638 reduction_info_table_type *reduction_list,
1639 tree bound)
1640{
1641 basic_block header = loop->header;
1642 basic_block latch = loop->latch;
1643 edge exit = single_dom_exit (loop);
1644 basic_block exit_block = exit->dest;
1645 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1646 tree control = gimple_cond_lhs (cond_stmt);
1647 edge e;
1648
338392ed
TV
1649 /* Rewriting virtuals into loop-closed ssa normal form makes this
1650 transformation simpler. It also ensures that the virtuals are in
1651 loop-closed ssa normal from after the transformation, which is required by
1652 create_parallel_loop. */
1653 rewrite_virtuals_into_loop_closed_ssa (loop);
7c82d827
TV
1654
1655 /* Create the new_header block. */
1656 basic_block new_header = split_block_before_cond_jump (exit->src);
712cb0bb 1657 edge edge_at_split = single_pred_edge (new_header);
7c82d827
TV
1658
1659 /* Redirect entry edge to new_header. */
1660 edge entry = loop_preheader_edge (loop);
1661 e = redirect_edge_and_branch (entry, new_header);
1662 gcc_assert (e == entry);
1663
1664 /* Redirect post_inc_edge to new_header. */
1665 edge post_inc_edge = single_succ_edge (latch);
1666 e = redirect_edge_and_branch (post_inc_edge, new_header);
1667 gcc_assert (e == post_inc_edge);
1668
1669 /* Redirect post_cond_edge to header. */
1670 edge post_cond_edge = single_pred_edge (latch);
1671 e = redirect_edge_and_branch (post_cond_edge, header);
1672 gcc_assert (e == post_cond_edge);
1673
712cb0bb
TV
1674 /* Redirect edge_at_split to latch. */
1675 e = redirect_edge_and_branch (edge_at_split, latch);
1676 gcc_assert (e == edge_at_split);
7c82d827
TV
1677
1678 /* Set the new loop bound. */
1679 gimple_cond_set_rhs (cond_stmt, bound);
5a5fd951 1680 update_stmt (cond_stmt);
7c82d827
TV
1681
1682 /* Repair the ssa. */
1683 vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
1684 edge_var_map *vm;
1685 gphi_iterator gsi;
338392ed 1686 int i;
7c82d827
TV
1687 for (gsi = gsi_start_phis (header), i = 0;
1688 !gsi_end_p (gsi) && v->iterate (i, &vm);
1689 gsi_next (&gsi), i++)
1690 {
1691 gphi *phi = gsi.phi ();
1692 tree res_a = PHI_RESULT (phi);
1693
1694 /* Create new phi. */
1695 tree res_c = copy_ssa_name (res_a, phi);
1696 gphi *nphi = create_phi_node (res_c, new_header);
1697
1698 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
1699 replace_uses_in_bb_by (res_a, res_c, new_header);
1700
1701 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
1702 add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
1703
338392ed 1704 /* Replace sum_b with sum_c in exit phi. */
7c82d827 1705 tree res_b = redirect_edge_var_map_def (vm);
338392ed 1706 replace_uses_in_bb_by (res_b, res_c, exit_block);
7c82d827
TV
1707
1708 struct reduction_info *red = reduction_phi (reduction_list, phi);
1709 gcc_assert (virtual_operand_p (res_a)
1710 || res_a == control
1711 || red != NULL);
1712
1713 if (red)
1714 {
1715 /* Register the new reduction phi. */
1716 red->reduc_phi = nphi;
1717 gimple_set_uid (red->reduc_phi, red->reduc_version);
1718 }
1719 }
1720 gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
7c82d827
TV
1721
1722 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
1723 flush_pending_stmts (entry);
1724
1725 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
1726 flush_pending_stmts (post_inc_edge);
1727
d42ba2d2
TV
1728
1729 basic_block new_exit_block = NULL;
1730 if (!single_pred_p (exit->dest))
1731 {
1732 /* Create a new empty exit block, inbetween the new loop header and the
1733 old exit block. The function separate_decls_in_region needs this block
1734 to insert code that is active on loop exit, but not any other path. */
1735 new_exit_block = split_edge (exit);
1736 }
712cb0bb
TV
1737
1738 /* Insert and register the reduction exit phis. */
7c82d827
TV
1739 for (gphi_iterator gsi = gsi_start_phis (exit_block);
1740 !gsi_end_p (gsi);
1741 gsi_next (&gsi))
1742 {
1743 gphi *phi = gsi.phi ();
d42ba2d2 1744 gphi *nphi = NULL;
7c82d827 1745 tree res_z = PHI_RESULT (phi);
d42ba2d2 1746 tree res_c;
712cb0bb 1747
d42ba2d2
TV
1748 if (new_exit_block != NULL)
1749 {
1750 /* Now that we have a new exit block, duplicate the phi of the old
1751 exit block in the new exit block to preserve loop-closed ssa. */
1752 edge succ_new_exit_block = single_succ_edge (new_exit_block);
1753 edge pred_new_exit_block = single_pred_edge (new_exit_block);
1754 tree res_y = copy_ssa_name (res_z, phi);
1755 nphi = create_phi_node (res_y, new_exit_block);
1756 res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
1757 add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
1758 add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
1759 }
1760 else
1761 res_c = PHI_ARG_DEF_FROM_EDGE (phi, exit);
712cb0bb 1762
7c82d827
TV
1763 if (virtual_operand_p (res_z))
1764 continue;
1765
355fe088 1766 gimple *reduc_phi = SSA_NAME_DEF_STMT (res_c);
7c82d827
TV
1767 struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
1768 if (red != NULL)
d42ba2d2
TV
1769 red->keep_res = (nphi != NULL
1770 ? nphi
1771 : phi);
7c82d827
TV
1772 }
1773
1774 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
1775 then we're still using some fields, so only bother about fields that are
1776 still used: header and latch.
1777 The loop has a new header bb, so we update it. The latch bb stays the
1778 same. */
1779 loop->header = new_header;
1780
1781 /* Recalculate dominance info. */
1782 free_dominance_info (CDI_DOMINATORS);
1783 calculate_dominance_info (CDI_DOMINATORS);
4a4b6c4c
TV
1784
1785 checking_verify_ssa (true, true);
7c82d827
TV
1786}
1787
1788/* Tries to moves the exit condition of LOOP to the beginning of its header
1789 without duplication of the loop body. NIT is the number of iterations of the
1790 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
1791 transformation is successful. */
1792
1793static bool
1794try_transform_to_exit_first_loop_alt (struct loop *loop,
1795 reduction_info_table_type *reduction_list,
1796 tree nit)
1797{
1798 /* Check whether the latch contains a single statement. */
1b7f61eb
TV
1799 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
1800 return false;
7c82d827 1801
d95167ee
TV
1802 /* Check whether the latch contains no phis. */
1803 if (phi_nodes (loop->latch) != NULL)
1804 return false;
1805
7c82d827
TV
1806 /* Check whether the latch contains the loop iv increment. */
1807 edge back = single_succ_edge (loop->latch);
1808 edge exit = single_dom_exit (loop);
1809 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1810 tree control = gimple_cond_lhs (cond_stmt);
1811 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
1812 tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
1813 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
1814 return false;
1815
1816 /* Check whether there's no code between the loop condition and the latch. */
1817 if (!single_pred_p (loop->latch)
1818 || single_pred (loop->latch) != exit->src)
1819 return false;
1820
1821 tree alt_bound = NULL_TREE;
1822 tree nit_type = TREE_TYPE (nit);
1823
1824 /* Figure out whether nit + 1 overflows. */
1825 if (TREE_CODE (nit) == INTEGER_CST)
1826 {
ff22eb12 1827 if (!tree_int_cst_equal (nit, TYPE_MAX_VALUE (nit_type)))
7c82d827
TV
1828 {
1829 alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
1830 nit, build_one_cst (nit_type));
1831
1832 gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
fd7b3ef5
TV
1833 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1834 return true;
7c82d827
TV
1835 }
1836 else
1837 {
1838 /* Todo: Figure out if we can trigger this, if it's worth to handle
1839 optimally, and if we can handle it optimally. */
fd7b3ef5 1840 return false;
7c82d827
TV
1841 }
1842 }
7c82d827 1843
fd7b3ef5 1844 gcc_assert (TREE_CODE (nit) == SSA_NAME);
7c82d827 1845
4f75d608
TV
1846 /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
1847 iv with base 0 and step 1 that is incremented in the latch, like this:
1848
1849 <bb header>:
1850 # iv_1 = PHI <0 (preheader), iv_2 (latch)>
1851 ...
1852 if (iv_1 < nit)
1853 goto <bb latch>;
1854 else
1855 goto <bb exit>;
1856
1857 <bb latch>:
1858 iv_2 = iv_1 + 1;
1859 goto <bb header>;
1860
1861 The range of iv_1 is [0, nit]. The latch edge is taken for
1862 iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit. So the
1863 number of latch executions is equal to nit.
1864
1865 The function max_loop_iterations gives us the maximum number of latch
1866 executions, so it gives us the maximum value of nit. */
1867 widest_int nit_max;
1868 if (!max_loop_iterations (loop, &nit_max))
1869 return false;
1870
1871 /* Check if nit + 1 overflows. */
ff22eb12 1872 widest_int type_max = wi::to_widest (TYPE_MAX_VALUE (nit_type));
032c80e9 1873 if (nit_max >= type_max)
4f75d608
TV
1874 return false;
1875
355fe088 1876 gimple *def = SSA_NAME_DEF_STMT (nit);
7c82d827 1877
4f75d608 1878 /* Try to find nit + 1, in the form of n in an assignment nit = n - 1. */
fd7b3ef5
TV
1879 if (def
1880 && is_gimple_assign (def)
1881 && gimple_assign_rhs_code (def) == PLUS_EXPR)
1882 {
1883 tree op1 = gimple_assign_rhs1 (def);
1884 tree op2 = gimple_assign_rhs2 (def);
1885 if (integer_minus_onep (op1))
1886 alt_bound = op2;
1887 else if (integer_minus_onep (op2))
1888 alt_bound = op1;
7c82d827
TV
1889 }
1890
9f620bf1 1891 /* If not found, insert nit + 1. */
7c82d827 1892 if (alt_bound == NULL_TREE)
9f620bf1
TV
1893 {
1894 alt_bound = fold_build2 (PLUS_EXPR, nit_type, nit,
1895 build_int_cst_type (nit_type, 1));
1896
1897 gimple_stmt_iterator gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
1898
1899 alt_bound
1900 = force_gimple_operand_gsi (&gsi, alt_bound, true, NULL_TREE, false,
1901 GSI_CONTINUE_LINKING);
1902 }
7c82d827
TV
1903
1904 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1905 return true;
1906}
1907
1908/* Moves the exit condition of LOOP to the beginning of its header. NIT is the
1909 number of iterations of the loop. REDUCTION_LIST describes the reductions in
1910 LOOP. */
5f40b3cb
ZD
1911
1912static void
4a8fb1a1 1913transform_to_exit_first_loop (struct loop *loop,
c203e8a7 1914 reduction_info_table_type *reduction_list,
4a8fb1a1 1915 tree nit)
5f40b3cb
ZD
1916{
1917 basic_block *bbs, *nbbs, ex_bb, orig_header;
1918 unsigned n;
1919 bool ok;
1920 edge exit = single_dom_exit (loop), hpred;
726a989a 1921 tree control, control_name, res, t;
538dd0b7
DM
1922 gphi *phi, *nphi;
1923 gassign *stmt;
1924 gcond *cond_stmt, *cond_nit;
48710229 1925 tree nit_1;
5f40b3cb
ZD
1926
1927 split_block_after_labels (loop->header);
1928 orig_header = single_succ (loop->header);
1929 hpred = single_succ_edge (loop->header);
1930
538dd0b7 1931 cond_stmt = as_a <gcond *> (last_stmt (exit->src));
726a989a
RB
1932 control = gimple_cond_lhs (cond_stmt);
1933 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
5f40b3cb
ZD
1934
1935 /* Make sure that we have phi nodes on exit for all loop header phis
1936 (create_parallel_loop requires that). */
538dd0b7
DM
1937 for (gphi_iterator gsi = gsi_start_phis (loop->header);
1938 !gsi_end_p (gsi);
1939 gsi_next (&gsi))
5f40b3cb 1940 {
538dd0b7 1941 phi = gsi.phi ();
5f40b3cb 1942 res = PHI_RESULT (phi);
070ecdfd 1943 t = copy_ssa_name (res, phi);
5f40b3cb 1944 SET_PHI_RESULT (phi, t);
5f40b3cb 1945 nphi = create_phi_node (res, orig_header);
9e227d60 1946 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
5f40b3cb
ZD
1947
1948 if (res == control)
1949 {
726a989a 1950 gimple_cond_set_lhs (cond_stmt, t);
5f40b3cb
ZD
1951 update_stmt (cond_stmt);
1952 control = t;
1953 }
1954 }
12037899 1955
5f40b3cb 1956 bbs = get_loop_body_in_dom_order (loop);
48710229 1957
69958396
RL
1958 for (n = 0; bbs[n] != exit->src; n++)
1959 continue;
5f40b3cb 1960 nbbs = XNEWVEC (basic_block, n);
726a989a
RB
1961 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1962 bbs + 1, n, nbbs);
5f40b3cb
ZD
1963 gcc_assert (ok);
1964 free (bbs);
1965 ex_bb = nbbs[0];
1966 free (nbbs);
1967
b8698a0f 1968 /* Other than reductions, the only gimple reg that should be copied
726a989a 1969 out of the loop is the control variable. */
69958396 1970 exit = single_dom_exit (loop);
5f40b3cb 1971 control_name = NULL_TREE;
538dd0b7
DM
1972 for (gphi_iterator gsi = gsi_start_phis (ex_bb);
1973 !gsi_end_p (gsi); )
5f40b3cb 1974 {
538dd0b7 1975 phi = gsi.phi ();
5f40b3cb 1976 res = PHI_RESULT (phi);
ea057359 1977 if (virtual_operand_p (res))
726a989a
RB
1978 {
1979 gsi_next (&gsi);
1980 continue;
1981 }
5f40b3cb 1982
a509ebb5 1983 /* Check if it is a part of reduction. If it is,
b8698a0f
L
1984 keep the phi at the reduction's keep_res field. The
1985 PHI_RESULT of this phi is the resulting value of the reduction
a509ebb5
RL
1986 variable when exiting the loop. */
1987
c203e8a7 1988 if (reduction_list->elements () > 0)
a509ebb5
RL
1989 {
1990 struct reduction_info *red;
1991
1992 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
a509ebb5
RL
1993 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1994 if (red)
726a989a
RB
1995 {
1996 red->keep_res = phi;
1997 gsi_next (&gsi);
1998 continue;
1999 }
a509ebb5 2000 }
726a989a
RB
2001 gcc_assert (control_name == NULL_TREE
2002 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
5f40b3cb 2003 control_name = res;
726a989a 2004 remove_phi_node (&gsi, false);
5f40b3cb
ZD
2005 }
2006 gcc_assert (control_name != NULL_TREE);
5f40b3cb 2007
b8698a0f 2008 /* Initialize the control variable to number of iterations
48710229 2009 according to the rhs of the exit condition. */
538dd0b7
DM
2010 gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
2011 cond_nit = as_a <gcond *> (last_stmt (exit->src));
48710229
RL
2012 nit_1 = gimple_cond_rhs (cond_nit);
2013 nit_1 = force_gimple_operand_gsi (&gsi,
2014 fold_convert (TREE_TYPE (control_name), nit_1),
726a989a 2015 false, NULL_TREE, false, GSI_SAME_STMT);
48710229 2016 stmt = gimple_build_assign (control_name, nit_1);
726a989a 2017 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
2018}
2019
2020/* Create the parallel constructs for LOOP as described in gen_parallel_loop.
726a989a 2021 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
5f40b3cb 2022 NEW_DATA is the variable that should be initialized from the argument
f99c3557
TS
2023 of LOOP_FN. N_THREADS is the requested number of threads, which can be 0 if
2024 that number is to be determined later. */
5f40b3cb 2025
a6f4d493 2026static void
5f40b3cb 2027create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
61d9c527
TV
2028 tree new_data, unsigned n_threads, location_t loc,
2029 bool oacc_kernels_p)
5f40b3cb 2030{
726a989a 2031 gimple_stmt_iterator gsi;
61d9c527 2032 basic_block for_bb, ex_bb, continue_bb;
0f900dfa 2033 tree t, param;
538dd0b7 2034 gomp_parallel *omp_par_stmt;
355fe088
TS
2035 gimple *omp_return_stmt1, *omp_return_stmt2;
2036 gimple *phi;
538dd0b7
DM
2037 gcond *cond_stmt;
2038 gomp_for *for_stmt;
2039 gomp_continue *omp_cont_stmt;
726a989a 2040 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
5f40b3cb
ZD
2041 edge exit, nexit, guard, end, e;
2042
61d9c527
TV
2043 if (oacc_kernels_p)
2044 {
25651634
TS
2045 gcc_checking_assert (lookup_attribute ("oacc kernels",
2046 DECL_ATTRIBUTES (cfun->decl)));
b0f271ce
TS
2047 /* Indicate to later processing that this is a parallelized OpenACC
2048 kernels construct. */
2049 DECL_ATTRIBUTES (cfun->decl)
2050 = tree_cons (get_identifier ("oacc kernels parallelized"),
2051 NULL_TREE, DECL_ATTRIBUTES (cfun->decl));
61d9c527
TV
2052 }
2053 else
2054 {
b0f271ce
TS
2055 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
2056
61d9c527
TV
2057 basic_block bb = loop_preheader_edge (loop)->src;
2058 basic_block paral_bb = single_pred (bb);
2059 gsi = gsi_last_bb (paral_bb);
5f40b3cb 2060
f99c3557 2061 gcc_checking_assert (n_threads != 0);
61d9c527
TV
2062 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
2063 OMP_CLAUSE_NUM_THREADS_EXPR (t)
2064 = build_int_cst (integer_type_node, n_threads);
2065 omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
2066 gimple_set_location (omp_par_stmt, loc);
5f40b3cb 2067
61d9c527 2068 gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
5f40b3cb 2069
61d9c527
TV
2070 /* Initialize NEW_DATA. */
2071 if (data)
2072 {
2073 gassign *assign_stmt;
538dd0b7 2074
61d9c527 2075 gsi = gsi_after_labels (bb);
726a989a 2076
61d9c527
TV
2077 param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
2078 assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
2079 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
726a989a 2080
61d9c527
TV
2081 assign_stmt = gimple_build_assign (new_data,
2082 fold_convert (TREE_TYPE (new_data), param));
2083 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
2084 }
5f40b3cb 2085
61d9c527
TV
2086 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
2087 bb = split_loop_exit_edge (single_dom_exit (loop));
2088 gsi = gsi_last_bb (bb);
2089 omp_return_stmt1 = gimple_build_omp_return (false);
2090 gimple_set_location (omp_return_stmt1, loc);
2091 gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
2092 }
5f40b3cb 2093
726a989a 2094 /* Extract data for GIMPLE_OMP_FOR. */
5f40b3cb 2095 gcc_assert (loop->header == single_dom_exit (loop)->src);
538dd0b7 2096 cond_stmt = as_a <gcond *> (last_stmt (loop->header));
5f40b3cb 2097
726a989a 2098 cvar = gimple_cond_lhs (cond_stmt);
5f40b3cb
ZD
2099 cvar_base = SSA_NAME_VAR (cvar);
2100 phi = SSA_NAME_DEF_STMT (cvar);
2101 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
b731b390 2102 initvar = copy_ssa_name (cvar);
5f40b3cb
ZD
2103 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2104 initvar);
2105 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2106
1dff453d 2107 gsi = gsi_last_nondebug_bb (loop->latch);
726a989a
RB
2108 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2109 gsi_remove (&gsi, true);
5f40b3cb
ZD
2110
2111 /* Prepare cfg. */
2112 for_bb = split_edge (loop_preheader_edge (loop));
2113 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2114 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2115 gcc_assert (exit == single_dom_exit (loop));
2116
2117 guard = make_edge (for_bb, ex_bb, 0);
357067f2
JH
2118 /* FIXME: What is the probability? */
2119 guard->probability = profile_probability::guessed_never ();
e67d7a1e
TV
2120 /* Split the latch edge, so LOOPS_HAVE_SIMPLE_LATCHES is still valid. */
2121 loop->latch = split_edge (single_succ_edge (loop->latch));
2122 single_pred_edge (loop->latch)->flags = 0;
357067f2 2123 end = make_single_succ_edge (single_pred (loop->latch), ex_bb, EDGE_FALLTHRU);
e67d7a1e
TV
2124 rescan_loop_exit (end, true, false);
2125
538dd0b7
DM
2126 for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2127 !gsi_end_p (gpi); gsi_next (&gpi))
5f40b3cb 2128 {
f5045c96 2129 source_location locus;
538dd0b7 2130 gphi *phi = gpi.phi ();
7781d262 2131 tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
355fe088 2132 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
538dd0b7 2133
7781d262
TV
2134 /* If the exit phi is not connected to a header phi in the same loop, this
2135 value is not modified in the loop, and we're done with this phi. */
2136 if (!(gimple_code (def_stmt) == GIMPLE_PHI
2137 && gimple_bb (def_stmt) == loop->header))
1c5211b1
TV
2138 {
2139 locus = gimple_phi_arg_location_from_edge (phi, exit);
2140 add_phi_arg (phi, def, guard, locus);
2141 add_phi_arg (phi, def, end, locus);
2142 continue;
2143 }
f5045c96 2144
7781d262 2145 gphi *stmt = as_a <gphi *> (def_stmt);
f5045c96 2146 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
b8698a0f 2147 locus = gimple_phi_arg_location_from_edge (stmt,
f5045c96 2148 loop_preheader_edge (loop));
9e227d60 2149 add_phi_arg (phi, def, guard, locus);
f5045c96
AM
2150
2151 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2152 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
9e227d60 2153 add_phi_arg (phi, def, end, locus);
5f40b3cb
ZD
2154 }
2155 e = redirect_edge_and_branch (exit, nexit->dest);
2156 PENDING_STMT (e) = NULL;
2157
726a989a 2158 /* Emit GIMPLE_OMP_FOR. */
61d9c527 2159 if (oacc_kernels_p)
b0f271ce
TS
2160 /* Parallelized OpenACC kernels constructs use gang parallelism. See also
2161 omp-offload.c:execute_oacc_device_lower. */
61d9c527
TV
2162 t = build_omp_clause (loc, OMP_CLAUSE_GANG);
2163 else
1f600fea 2164 {
61d9c527
TV
2165 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
2166 int chunk_size = PARAM_VALUE (PARAM_PARLOOPS_CHUNK_SIZE);
2167 enum PARAM_PARLOOPS_SCHEDULE_KIND schedule_type \
2168 = (enum PARAM_PARLOOPS_SCHEDULE_KIND) PARAM_VALUE (PARAM_PARLOOPS_SCHEDULE);
2169 switch (schedule_type)
2170 {
2171 case PARAM_PARLOOPS_SCHEDULE_KIND_static:
2172 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2173 break;
2174 case PARAM_PARLOOPS_SCHEDULE_KIND_dynamic:
2175 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_DYNAMIC;
2176 break;
2177 case PARAM_PARLOOPS_SCHEDULE_KIND_guided:
2178 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_GUIDED;
2179 break;
2180 case PARAM_PARLOOPS_SCHEDULE_KIND_auto:
2181 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_AUTO;
2182 chunk_size = 0;
2183 break;
2184 case PARAM_PARLOOPS_SCHEDULE_KIND_runtime:
2185 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_RUNTIME;
2186 chunk_size = 0;
2187 break;
2188 default:
2189 gcc_unreachable ();
2190 }
2191 if (chunk_size != 0)
2192 OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t)
2193 = build_int_cst (integer_type_node, chunk_size);
1f600fea 2194 }
5f40b3cb 2195
61d9c527
TV
2196 for_stmt = gimple_build_omp_for (NULL,
2197 (oacc_kernels_p
2198 ? GF_OMP_FOR_KIND_OACC_LOOP
2199 : GF_OMP_FOR_KIND_FOR),
2200 t, 1, NULL);
2201
2202 gimple_cond_set_lhs (cond_stmt, cvar_base);
2203 type = TREE_TYPE (cvar);
9ff70652 2204 gimple_set_location (for_stmt, loc);
726a989a
RB
2205 gimple_omp_for_set_index (for_stmt, 0, initvar);
2206 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2207 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2208 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2209 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2210 cvar_base,
2211 build_int_cst (type, 1)));
2212
2213 gsi = gsi_last_bb (for_bb);
2214 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
5f40b3cb
ZD
2215 SSA_NAME_DEF_STMT (initvar) = for_stmt;
2216
726a989a 2217 /* Emit GIMPLE_OMP_CONTINUE. */
e67d7a1e
TV
2218 continue_bb = single_pred (loop->latch);
2219 gsi = gsi_last_bb (continue_bb);
538dd0b7
DM
2220 omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2221 gimple_set_location (omp_cont_stmt, loc);
2222 gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2223 SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
5f40b3cb 2224
726a989a
RB
2225 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2226 gsi = gsi_last_bb (ex_bb);
538dd0b7
DM
2227 omp_return_stmt2 = gimple_build_omp_return (true);
2228 gimple_set_location (omp_return_stmt2, loc);
2229 gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
5f40b3cb 2230
cd7d9fd7
RG
2231 /* After the above dom info is hosed. Re-compute it. */
2232 free_dominance_info (CDI_DOMINATORS);
2233 calculate_dominance_info (CDI_DOMINATORS);
5f40b3cb
ZD
2234}
2235
08dab97a 2236/* Generates code to execute the iterations of LOOP in N_THREADS
f99c3557
TS
2237 threads in parallel, which can be 0 if that number is to be determined
2238 later.
08dab97a
RL
2239
2240 NITER describes number of iterations of LOOP.
fa10beec 2241 REDUCTION_LIST describes the reductions existent in the LOOP. */
5f40b3cb
ZD
2242
2243static void
c203e8a7
TS
2244gen_parallel_loop (struct loop *loop,
2245 reduction_info_table_type *reduction_list,
61d9c527
TV
2246 unsigned n_threads, struct tree_niter_desc *niter,
2247 bool oacc_kernels_p)
5f40b3cb 2248{
5f40b3cb 2249 tree many_iterations_cond, type, nit;
726a989a
RB
2250 tree arg_struct, new_arg_struct;
2251 gimple_seq stmts;
9f9f72aa 2252 edge entry, exit;
a509ebb5 2253 struct clsn_data clsn_data;
9ff70652 2254 location_t loc;
355fe088 2255 gimple *cond_stmt;
768da0da 2256 unsigned int m_p_thread=2;
5f40b3cb
ZD
2257
2258 /* From
2259
2260 ---------------------------------------------------------------------
2261 loop
2262 {
2263 IV = phi (INIT, IV + STEP)
2264 BODY1;
2265 if (COND)
2266 break;
2267 BODY2;
2268 }
2269 ---------------------------------------------------------------------
2270
2271 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2272 we generate the following code:
2273
2274 ---------------------------------------------------------------------
2275
2276 if (MAY_BE_ZERO
a509ebb5
RL
2277 || NITER < MIN_PER_THREAD * N_THREADS)
2278 goto original;
5f40b3cb
ZD
2279
2280 BODY1;
2281 store all local loop-invariant variables used in body of the loop to DATA.
726a989a 2282 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
5f40b3cb 2283 load the variables from DATA.
726a989a 2284 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
5f40b3cb
ZD
2285 BODY2;
2286 BODY1;
726a989a
RB
2287 GIMPLE_OMP_CONTINUE;
2288 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
2289 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
5f40b3cb
ZD
2290 goto end;
2291
2292 original:
2293 loop
2294 {
2295 IV = phi (INIT, IV + STEP)
2296 BODY1;
2297 if (COND)
2298 break;
2299 BODY2;
2300 }
2301
2302 end:
2303
2304 */
2305
2306 /* Create two versions of the loop -- in the old one, we know that the
2307 number of iterations is large enough, and we will transform it into the
2308 loop that will be split to loop_fn, the new one will be used for the
2309 remaining iterations. */
a509ebb5 2310
768da0da
RL
2311 /* We should compute a better number-of-iterations value for outer loops.
2312 That is, if we have
2313
2314 for (i = 0; i < n; ++i)
2315 for (j = 0; j < m; ++j)
2316 ...
2317
2318 we should compute nit = n * m, not nit = n.
2319 Also may_be_zero handling would need to be adjusted. */
2320
5f40b3cb
ZD
2321 type = TREE_TYPE (niter->niter);
2322 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
2323 NULL_TREE);
2324 if (stmts)
726a989a 2325 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb 2326
61d9c527 2327 if (!oacc_kernels_p)
5f40b3cb 2328 {
61d9c527
TV
2329 if (loop->inner)
2330 m_p_thread=2;
2331 else
2332 m_p_thread=MIN_PER_THREAD;
2333
f99c3557 2334 gcc_checking_assert (n_threads != 0);
61d9c527
TV
2335 many_iterations_cond =
2336 fold_build2 (GE_EXPR, boolean_type_node,
2337 nit, build_int_cst (type, m_p_thread * n_threads));
2338
5f40b3cb 2339 many_iterations_cond
61d9c527
TV
2340 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2341 invert_truthvalue (unshare_expr (niter->may_be_zero)),
2342 many_iterations_cond);
2343 many_iterations_cond
2344 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
5f40b3cb 2345 if (stmts)
726a989a 2346 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
61d9c527
TV
2347 if (!is_gimple_condexpr (many_iterations_cond))
2348 {
2349 many_iterations_cond
2350 = force_gimple_operand (many_iterations_cond, &stmts,
2351 true, NULL_TREE);
2352 if (stmts)
2353 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop),
2354 stmts);
2355 }
5f40b3cb 2356
61d9c527 2357 initialize_original_copy_tables ();
5f40b3cb 2358
61d9c527 2359 /* We assume that the loop usually iterates a lot. */
61d9c527 2360 loop_version (loop, many_iterations_cond, NULL,
af2bbc51
JH
2361 profile_probability::likely (),
2362 profile_probability::unlikely (),
2363 profile_probability::likely (),
2364 profile_probability::unlikely (), true);
61d9c527
TV
2365 update_ssa (TODO_update_ssa);
2366 free_original_copy_tables ();
2367 }
5f40b3cb
ZD
2368
2369 /* Base all the induction variables in LOOP on a single control one. */
c80a5403 2370 canonicalize_loop_ivs (loop, &nit, true);
5f40b3cb 2371
7c82d827
TV
2372 /* Ensure that the exit condition is the first statement in the loop.
2373 The common case is that latch of the loop is empty (apart from the
2374 increment) and immediately follows the loop exit test. Attempt to move the
2375 entry of the loop directly before the exit check and increase the number of
2376 iterations of the loop by one. */
a5a57bf3
TV
2377 if (try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
2378 {
2379 if (dump_file
2380 && (dump_flags & TDF_DETAILS))
2381 fprintf (dump_file,
2382 "alternative exit-first loop transform succeeded"
2383 " for loop %d\n", loop->num);
2384 }
2385 else
7c82d827 2386 {
61d9c527
TV
2387 if (oacc_kernels_p)
2388 n_threads = 1;
2389
7c82d827
TV
2390 /* Fall back on the method that handles more cases, but duplicates the
2391 loop body: move the exit condition of LOOP to the beginning of its
2392 header, and duplicate the part of the last iteration that gets disabled
2393 to the exit of the loop. */
2394 transform_to_exit_first_loop (loop, reduction_list, nit);
2395 }
a509ebb5 2396
fa10beec 2397 /* Generate initializations for reductions. */
c203e8a7
TS
2398 if (reduction_list->elements () > 0)
2399 reduction_list->traverse <struct loop *, initialize_reductions> (loop);
5f40b3cb
ZD
2400
2401 /* Eliminate the references to local variables from the loop. */
9f9f72aa
AP
2402 gcc_assert (single_exit (loop));
2403 entry = loop_preheader_edge (loop);
2404 exit = single_dom_exit (loop);
5f40b3cb 2405
61d9c527
TV
2406 /* This rewrites the body in terms of new variables. This has already
2407 been done for oacc_kernels_p in pass_lower_omp/lower_omp (). */
2408 if (!oacc_kernels_p)
2409 {
2410 eliminate_local_variables (entry, exit);
2411 /* In the old loop, move all variables non-local to the loop to a
2412 structure and back, and create separate decls for the variables used in
2413 loop. */
2414 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
2415 &new_arg_struct, &clsn_data);
2416 }
2417 else
2418 {
2419 arg_struct = NULL_TREE;
2420 new_arg_struct = NULL_TREE;
2421 clsn_data.load = NULL_TREE;
2422 clsn_data.load_bb = exit->dest;
2423 clsn_data.store = NULL_TREE;
2424 clsn_data.store_bb = NULL;
2425 }
5f40b3cb
ZD
2426
2427 /* Create the parallel constructs. */
9ff70652
JJ
2428 loc = UNKNOWN_LOCATION;
2429 cond_stmt = last_stmt (loop->header);
2430 if (cond_stmt)
2431 loc = gimple_location (cond_stmt);
61d9c527
TV
2432 create_parallel_loop (loop, create_loop_fn (loc), arg_struct, new_arg_struct,
2433 n_threads, loc, oacc_kernels_p);
c203e8a7 2434 if (reduction_list->elements () > 0)
a509ebb5 2435 create_call_for_reduction (loop, reduction_list, &clsn_data);
5f40b3cb
ZD
2436
2437 scev_reset ();
2438
92a6bdbd
SP
2439 /* Free loop bound estimations that could contain references to
2440 removed statements. */
adb7eaa2 2441 free_numbers_of_iterations_estimates (cfun);
5f40b3cb
ZD
2442}
2443
9857228c
SP
2444/* Returns true when LOOP contains vector phi nodes. */
2445
2446static bool
726a989a 2447loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
9857228c
SP
2448{
2449 unsigned i;
2450 basic_block *bbs = get_loop_body_in_dom_order (loop);
538dd0b7 2451 gphi_iterator gsi;
9857228c 2452 bool res = true;
9857228c
SP
2453
2454 for (i = 0; i < loop->num_nodes; i++)
726a989a 2455 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
538dd0b7 2456 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
9857228c
SP
2457 goto end;
2458
2459 res = false;
2460 end:
2461 free (bbs);
2462 return res;
2463}
2464
08dab97a
RL
2465/* Create a reduction_info struct, initialize it with REDUC_STMT
2466 and PHI, insert it to the REDUCTION_LIST. */
2467
2468static void
c203e8a7 2469build_new_reduction (reduction_info_table_type *reduction_list,
355fe088 2470 gimple *reduc_stmt, gphi *phi)
08dab97a 2471{
4a8fb1a1 2472 reduction_info **slot;
08dab97a 2473 struct reduction_info *new_reduction;
12efb1d7 2474 enum tree_code reduction_code;
08dab97a
RL
2475
2476 gcc_assert (reduc_stmt);
b8698a0f 2477
08dab97a
RL
2478 if (dump_file && (dump_flags & TDF_DETAILS))
2479 {
2480 fprintf (dump_file,
430002b9 2481 "Detected reduction. reduction stmt is:\n");
ef6cb4c7 2482 print_gimple_stmt (dump_file, reduc_stmt, 0);
08dab97a
RL
2483 fprintf (dump_file, "\n");
2484 }
b8698a0f 2485
12efb1d7
TV
2486 if (gimple_code (reduc_stmt) == GIMPLE_PHI)
2487 {
2488 tree op1 = PHI_ARG_DEF (reduc_stmt, 0);
355fe088 2489 gimple *def1 = SSA_NAME_DEF_STMT (op1);
12efb1d7
TV
2490 reduction_code = gimple_assign_rhs_code (def1);
2491 }
2492
2493 else
2494 reduction_code = gimple_assign_rhs_code (reduc_stmt);
2495
08dab97a 2496 new_reduction = XCNEW (struct reduction_info);
b8698a0f 2497
08dab97a
RL
2498 new_reduction->reduc_stmt = reduc_stmt;
2499 new_reduction->reduc_phi = phi;
5d1fd1de 2500 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
12efb1d7 2501 new_reduction->reduction_code = reduction_code;
c203e8a7 2502 slot = reduction_list->find_slot (new_reduction, INSERT);
08dab97a
RL
2503 *slot = new_reduction;
2504}
2505
5d1fd1de
JJ
2506/* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
2507
4a8fb1a1
LC
2508int
2509set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
5d1fd1de 2510{
4a8fb1a1 2511 struct reduction_info *const red = *slot;
5d1fd1de
JJ
2512 gimple_set_uid (red->reduc_phi, red->reduc_version);
2513 return 1;
2514}
2515
08dab97a
RL
2516/* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
2517
2518static void
c203e8a7 2519gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
08dab97a 2520{
538dd0b7 2521 gphi_iterator gsi;
08dab97a 2522 loop_vec_info simple_loop_info;
846b1a18
JJ
2523 auto_vec<gphi *, 4> double_reduc_phis;
2524 auto_vec<gimple *, 4> double_reduc_stmts;
08dab97a 2525
fdce493d
TV
2526 if (!stmt_vec_info_vec.exists ())
2527 init_stmt_vec_info_vec ();
2528
08dab97a 2529 simple_loop_info = vect_analyze_loop_form (loop);
1e6a7b01 2530 if (simple_loop_info == NULL)
1cabb204 2531 goto gather_done;
08dab97a
RL
2532
2533 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2534 {
538dd0b7 2535 gphi *phi = gsi.phi ();
08dab97a
RL
2536 affine_iv iv;
2537 tree res = PHI_RESULT (phi);
2538 bool double_reduc;
2539
ea057359 2540 if (virtual_operand_p (res))
08dab97a
RL
2541 continue;
2542
1e6a7b01
TV
2543 if (simple_iv (loop, loop, res, &iv, true))
2544 continue;
2545
355fe088 2546 gimple *reduc_stmt
1a58f770 2547 = vect_force_simple_reduction (simple_loop_info, phi,
1e6a7b01 2548 &double_reduc, true);
12efb1d7 2549 if (!reduc_stmt)
1e6a7b01
TV
2550 continue;
2551
12efb1d7
TV
2552 if (double_reduc)
2553 {
846b1a18 2554 if (loop->inner->inner != NULL)
12efb1d7
TV
2555 continue;
2556
846b1a18
JJ
2557 double_reduc_phis.safe_push (phi);
2558 double_reduc_stmts.safe_push (reduc_stmt);
2559 continue;
12efb1d7
TV
2560 }
2561
1e6a7b01 2562 build_new_reduction (reduction_list, reduc_stmt, phi);
08dab97a 2563 }
5d1fd1de 2564 destroy_loop_vec_info (simple_loop_info, true);
846b1a18
JJ
2565
2566 if (!double_reduc_phis.is_empty ())
2567 {
2568 simple_loop_info = vect_analyze_loop_form (loop->inner);
2569 if (simple_loop_info)
2570 {
2571 gphi *phi;
2572 unsigned int i;
2573
2574 FOR_EACH_VEC_ELT (double_reduc_phis, i, phi)
2575 {
2576 affine_iv iv;
2577 tree res = PHI_RESULT (phi);
2578 bool double_reduc;
2579
2580 use_operand_p use_p;
2581 gimple *inner_stmt;
2582 bool single_use_p = single_imm_use (res, &use_p, &inner_stmt);
2583 gcc_assert (single_use_p);
2584 if (gimple_code (inner_stmt) != GIMPLE_PHI)
2585 continue;
2586 gphi *inner_phi = as_a <gphi *> (inner_stmt);
2587 if (simple_iv (loop->inner, loop->inner, PHI_RESULT (inner_phi),
2588 &iv, true))
2589 continue;
2590
2591 gimple *inner_reduc_stmt
2592 = vect_force_simple_reduction (simple_loop_info, inner_phi,
1a58f770 2593 &double_reduc, true);
846b1a18
JJ
2594 gcc_assert (!double_reduc);
2595 if (inner_reduc_stmt == NULL)
2596 continue;
2597
2598 build_new_reduction (reduction_list, double_reduc_stmts[i], phi);
2599 }
2600 destroy_loop_vec_info (simple_loop_info, true);
2601 }
2602 }
5d1fd1de 2603
1cabb204 2604 gather_done:
fdce493d
TV
2605 /* Release the claim on gimple_uid. */
2606 free_stmt_vec_info_vec ();
2607
1cabb204
TV
2608 if (reduction_list->elements () == 0)
2609 return;
2610
5d1fd1de 2611 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
fdce493d
TV
2612 and free_stmt_vec_info_vec, we can set gimple_uid of reduc_phi stmts only
2613 now. */
2614 basic_block bb;
2615 FOR_EACH_BB_FN (bb, cfun)
2616 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2617 gimple_set_uid (gsi_stmt (gsi), (unsigned int)-1);
c203e8a7 2618 reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
08dab97a
RL
2619}
2620
2621/* Try to initialize NITER for code generation part. */
2622
2623static bool
2624try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2625{
2626 edge exit = single_dom_exit (loop);
2627
2628 gcc_assert (exit);
2629
2630 /* We need to know # of iterations, and there should be no uses of values
2631 defined inside loop outside of it, unless the values are invariants of
2632 the loop. */
2633 if (!number_of_iterations_exit (loop, exit, niter, false))
2634 {
2635 if (dump_file && (dump_flags & TDF_DETAILS))
2636 fprintf (dump_file, " FAILED: number of iterations not known\n");
2637 return false;
2638 }
2639
2640 return true;
2641}
2642
61d9c527
TV
2643/* Return the default def of the first function argument. */
2644
2645static tree
2646get_omp_data_i_param (void)
2647{
2648 tree decl = DECL_ARGUMENTS (cfun->decl);
2649 gcc_assert (DECL_CHAIN (decl) == NULL_TREE);
2650 return ssa_default_def (cfun, decl);
2651}
2652
2653/* For PHI in loop header of LOOP, look for pattern:
2654
2655 <bb preheader>
2656 .omp_data_i = &.omp_data_arr;
2657 addr = .omp_data_i->sum;
2658 sum_a = *addr;
2659
2660 <bb header>:
2661 sum_b = PHI <sum_a (preheader), sum_c (latch)>
2662
2663 and return addr. Otherwise, return NULL_TREE. */
2664
2665static tree
2666find_reduc_addr (struct loop *loop, gphi *phi)
2667{
2668 edge e = loop_preheader_edge (loop);
2669 tree arg = PHI_ARG_DEF_FROM_EDGE (phi, e);
2670 gimple *stmt = SSA_NAME_DEF_STMT (arg);
2671 if (!gimple_assign_single_p (stmt))
2672 return NULL_TREE;
2673 tree memref = gimple_assign_rhs1 (stmt);
2674 if (TREE_CODE (memref) != MEM_REF)
2675 return NULL_TREE;
2676 tree addr = TREE_OPERAND (memref, 0);
2677
2678 gimple *stmt2 = SSA_NAME_DEF_STMT (addr);
2679 if (!gimple_assign_single_p (stmt2))
2680 return NULL_TREE;
2681 tree compref = gimple_assign_rhs1 (stmt2);
2682 if (TREE_CODE (compref) != COMPONENT_REF)
2683 return NULL_TREE;
2684 tree addr2 = TREE_OPERAND (compref, 0);
2685 if (TREE_CODE (addr2) != MEM_REF)
2686 return NULL_TREE;
2687 addr2 = TREE_OPERAND (addr2, 0);
2688 if (TREE_CODE (addr2) != SSA_NAME
2689 || addr2 != get_omp_data_i_param ())
2690 return NULL_TREE;
2691
2692 return addr;
2693}
2694
08dab97a
RL
2695/* Try to initialize REDUCTION_LIST for code generation part.
2696 REDUCTION_LIST describes the reductions. */
2697
2698static bool
4a8fb1a1 2699try_create_reduction_list (loop_p loop,
61d9c527
TV
2700 reduction_info_table_type *reduction_list,
2701 bool oacc_kernels_p)
08dab97a
RL
2702{
2703 edge exit = single_dom_exit (loop);
538dd0b7 2704 gphi_iterator gsi;
08dab97a
RL
2705
2706 gcc_assert (exit);
2707
f993a853
TV
2708 /* Try to get rid of exit phis. */
2709 final_value_replacement_loop (loop);
2710
08dab97a
RL
2711 gather_scalar_reductions (loop, reduction_list);
2712
b8698a0f 2713
08dab97a
RL
2714 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2715 {
538dd0b7 2716 gphi *phi = gsi.phi ();
08dab97a
RL
2717 struct reduction_info *red;
2718 imm_use_iterator imm_iter;
2719 use_operand_p use_p;
355fe088 2720 gimple *reduc_phi;
08dab97a
RL
2721 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2722
ea057359 2723 if (!virtual_operand_p (val))
08dab97a
RL
2724 {
2725 if (dump_file && (dump_flags & TDF_DETAILS))
2726 {
2727 fprintf (dump_file, "phi is ");
ef6cb4c7 2728 print_gimple_stmt (dump_file, phi, 0);
08dab97a 2729 fprintf (dump_file, "arg of phi to exit: value ");
ef6cb4c7 2730 print_generic_expr (dump_file, val);
08dab97a
RL
2731 fprintf (dump_file, " used outside loop\n");
2732 fprintf (dump_file,
430002b9 2733 " checking if it is part of reduction pattern:\n");
08dab97a 2734 }
c203e8a7 2735 if (reduction_list->elements () == 0)
08dab97a
RL
2736 {
2737 if (dump_file && (dump_flags & TDF_DETAILS))
2738 fprintf (dump_file,
2739 " FAILED: it is not a part of reduction.\n");
2740 return false;
2741 }
2742 reduc_phi = NULL;
2743 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2744 {
4942af9b
JJ
2745 if (!gimple_debug_bind_p (USE_STMT (use_p))
2746 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
08dab97a
RL
2747 {
2748 reduc_phi = USE_STMT (use_p);
2749 break;
2750 }
2751 }
2752 red = reduction_phi (reduction_list, reduc_phi);
2753 if (red == NULL)
2754 {
2755 if (dump_file && (dump_flags & TDF_DETAILS))
2756 fprintf (dump_file,
2757 " FAILED: it is not a part of reduction.\n");
2758 return false;
2759 }
23fab8ae
TV
2760 if (red->keep_res != NULL)
2761 {
2762 if (dump_file && (dump_flags & TDF_DETAILS))
2763 fprintf (dump_file,
2764 " FAILED: reduction has multiple exit phis.\n");
2765 return false;
2766 }
2767 red->keep_res = phi;
08dab97a
RL
2768 if (dump_file && (dump_flags & TDF_DETAILS))
2769 {
2770 fprintf (dump_file, "reduction phi is ");
ef6cb4c7 2771 print_gimple_stmt (dump_file, red->reduc_phi, 0);
08dab97a 2772 fprintf (dump_file, "reduction stmt is ");
ef6cb4c7 2773 print_gimple_stmt (dump_file, red->reduc_stmt, 0);
08dab97a
RL
2774 }
2775 }
2776 }
2777
2778 /* The iterations of the loop may communicate only through bivs whose
2779 iteration space can be distributed efficiently. */
2780 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2781 {
538dd0b7 2782 gphi *phi = gsi.phi ();
08dab97a
RL
2783 tree def = PHI_RESULT (phi);
2784 affine_iv iv;
2785
ea057359 2786 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
08dab97a
RL
2787 {
2788 struct reduction_info *red;
2789
2790 red = reduction_phi (reduction_list, phi);
2791 if (red == NULL)
2792 {
2793 if (dump_file && (dump_flags & TDF_DETAILS))
2794 fprintf (dump_file,
2795 " FAILED: scalar dependency between iterations\n");
2796 return false;
2797 }
2798 }
2799 }
2800
61d9c527
TV
2801 if (oacc_kernels_p)
2802 {
2803 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi);
2804 gsi_next (&gsi))
2805 {
2806 gphi *phi = gsi.phi ();
2807 tree def = PHI_RESULT (phi);
2808 affine_iv iv;
2809
2810 if (!virtual_operand_p (def)
2811 && !simple_iv (loop, loop, def, &iv, true))
2812 {
2813 tree addr = find_reduc_addr (loop, phi);
2814 if (addr == NULL_TREE)
2815 return false;
2816 struct reduction_info *red = reduction_phi (reduction_list, phi);
2817 red->reduc_addr = addr;
2818 }
2819 }
2820 }
08dab97a
RL
2821
2822 return true;
2823}
2824
3907c6cf
TV
2825/* Return true if LOOP contains phis with ADDR_EXPR in args. */
2826
2827static bool
2828loop_has_phi_with_address_arg (struct loop *loop)
2829{
2830 basic_block *bbs = get_loop_body (loop);
2831 bool res = false;
2832
2833 unsigned i, j;
2834 gphi_iterator gsi;
2835 for (i = 0; i < loop->num_nodes; i++)
2836 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
2837 {
2838 gphi *phi = gsi.phi ();
2839 for (j = 0; j < gimple_phi_num_args (phi); j++)
2840 {
2841 tree arg = gimple_phi_arg_def (phi, j);
2842 if (TREE_CODE (arg) == ADDR_EXPR)
2843 {
2844 /* This should be handled by eliminate_local_variables, but that
2845 function currently ignores phis. */
2846 res = true;
2847 goto end;
2848 }
2849 }
2850 }
2851 end:
2852 free (bbs);
61d9c527
TV
2853
2854 return res;
2855}
2856
2857/* Return true if memory ref REF (corresponding to the stmt at GSI in
2858 REGIONS_BB[I]) conflicts with the statements in REGIONS_BB[I] after gsi,
2859 or the statements in REGIONS_BB[I + n]. REF_IS_STORE indicates if REF is a
2860 store. Ignore conflicts with SKIP_STMT. */
2861
2862static bool
2863ref_conflicts_with_region (gimple_stmt_iterator gsi, ao_ref *ref,
2864 bool ref_is_store, vec<basic_block> region_bbs,
2865 unsigned int i, gimple *skip_stmt)
2866{
2867 basic_block bb = region_bbs[i];
2868 gsi_next (&gsi);
2869
2870 while (true)
2871 {
2872 for (; !gsi_end_p (gsi);
2873 gsi_next (&gsi))
2874 {
2875 gimple *stmt = gsi_stmt (gsi);
2876 if (stmt == skip_stmt)
2877 {
2878 if (dump_file)
2879 {
2880 fprintf (dump_file, "skipping reduction store: ");
ef6cb4c7 2881 print_gimple_stmt (dump_file, stmt, 0);
61d9c527
TV
2882 }
2883 continue;
2884 }
2885
2886 if (!gimple_vdef (stmt)
2887 && !gimple_vuse (stmt))
2888 continue;
2889
2890 if (gimple_code (stmt) == GIMPLE_RETURN)
2891 continue;
2892
2893 if (ref_is_store)
2894 {
2895 if (ref_maybe_used_by_stmt_p (stmt, ref))
2896 {
2897 if (dump_file)
2898 {
2899 fprintf (dump_file, "Stmt ");
ef6cb4c7 2900 print_gimple_stmt (dump_file, stmt, 0);
61d9c527
TV
2901 }
2902 return true;
2903 }
2904 }
2905 else
2906 {
2907 if (stmt_may_clobber_ref_p_1 (stmt, ref))
2908 {
2909 if (dump_file)
2910 {
2911 fprintf (dump_file, "Stmt ");
ef6cb4c7 2912 print_gimple_stmt (dump_file, stmt, 0);
61d9c527
TV
2913 }
2914 return true;
2915 }
2916 }
2917 }
2918 i++;
2919 if (i == region_bbs.length ())
2920 break;
2921 bb = region_bbs[i];
2922 gsi = gsi_start_bb (bb);
2923 }
2924
2925 return false;
2926}
2927
2928/* Return true if the bbs in REGION_BBS but not in in_loop_bbs can be executed
2929 in parallel with REGION_BBS containing the loop. Return the stores of
2930 reduction results in REDUCTION_STORES. */
2931
2932static bool
2933oacc_entry_exit_ok_1 (bitmap in_loop_bbs, vec<basic_block> region_bbs,
2934 reduction_info_table_type *reduction_list,
2935 bitmap reduction_stores)
2936{
2937 tree omp_data_i = get_omp_data_i_param ();
2938
2939 unsigned i;
2940 basic_block bb;
2941 FOR_EACH_VEC_ELT (region_bbs, i, bb)
2942 {
2943 if (bitmap_bit_p (in_loop_bbs, bb->index))
2944 continue;
2945
2946 gimple_stmt_iterator gsi;
2947 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
2948 gsi_next (&gsi))
2949 {
2950 gimple *stmt = gsi_stmt (gsi);
2951 gimple *skip_stmt = NULL;
2952
2953 if (is_gimple_debug (stmt)
2954 || gimple_code (stmt) == GIMPLE_COND)
2955 continue;
2956
2957 ao_ref ref;
2958 bool ref_is_store = false;
2959 if (gimple_assign_load_p (stmt))
2960 {
2961 tree rhs = gimple_assign_rhs1 (stmt);
2962 tree base = get_base_address (rhs);
2963 if (TREE_CODE (base) == MEM_REF
2964 && operand_equal_p (TREE_OPERAND (base, 0), omp_data_i, 0))
2965 continue;
2966
2967 tree lhs = gimple_assign_lhs (stmt);
2968 if (TREE_CODE (lhs) == SSA_NAME
2969 && has_single_use (lhs))
2970 {
2971 use_operand_p use_p;
2972 gimple *use_stmt;
2973 single_imm_use (lhs, &use_p, &use_stmt);
2974 if (gimple_code (use_stmt) == GIMPLE_PHI)
2975 {
2976 struct reduction_info *red;
2977 red = reduction_phi (reduction_list, use_stmt);
2978 tree val = PHI_RESULT (red->keep_res);
2979 if (has_single_use (val))
2980 {
2981 single_imm_use (val, &use_p, &use_stmt);
2982 if (gimple_store_p (use_stmt))
2983 {
2984 unsigned int id
2985 = SSA_NAME_VERSION (gimple_vdef (use_stmt));
2986 bitmap_set_bit (reduction_stores, id);
2987 skip_stmt = use_stmt;
2988 if (dump_file)
2989 {
2990 fprintf (dump_file, "found reduction load: ");
ef6cb4c7 2991 print_gimple_stmt (dump_file, stmt, 0);
61d9c527
TV
2992 }
2993 }
2994 }
2995 }
2996 }
2997
2998 ao_ref_init (&ref, rhs);
2999 }
3000 else if (gimple_store_p (stmt))
3001 {
3002 ao_ref_init (&ref, gimple_assign_lhs (stmt));
3003 ref_is_store = true;
3004 }
3005 else if (gimple_code (stmt) == GIMPLE_OMP_RETURN)
3006 continue;
3007 else if (!gimple_has_side_effects (stmt)
3008 && !gimple_could_trap_p (stmt)
3009 && !stmt_could_throw_p (stmt)
3010 && !gimple_vdef (stmt)
3011 && !gimple_vuse (stmt))
3012 continue;
8e4284d0 3013 else if (gimple_call_internal_p (stmt, IFN_GOACC_DIM_POS))
61d9c527
TV
3014 continue;
3015 else if (gimple_code (stmt) == GIMPLE_RETURN)
3016 continue;
3017 else
3018 {
3019 if (dump_file)
3020 {
3021 fprintf (dump_file, "Unhandled stmt in entry/exit: ");
ef6cb4c7 3022 print_gimple_stmt (dump_file, stmt, 0);
61d9c527
TV
3023 }
3024 return false;
3025 }
3026
3027 if (ref_conflicts_with_region (gsi, &ref, ref_is_store, region_bbs,
3028 i, skip_stmt))
3029 {
3030 if (dump_file)
3031 {
3032 fprintf (dump_file, "conflicts with entry/exit stmt: ");
ef6cb4c7 3033 print_gimple_stmt (dump_file, stmt, 0);
61d9c527
TV
3034 }
3035 return false;
3036 }
3037 }
3038 }
3039
3040 return true;
3041}
3042
3043/* Find stores inside REGION_BBS and outside IN_LOOP_BBS, and guard them with
3044 gang_pos == 0, except when the stores are REDUCTION_STORES. Return true
3045 if any changes were made. */
3046
3047static bool
3048oacc_entry_exit_single_gang (bitmap in_loop_bbs, vec<basic_block> region_bbs,
3049 bitmap reduction_stores)
3050{
3051 tree gang_pos = NULL_TREE;
3052 bool changed = false;
3053
3054 unsigned i;
3055 basic_block bb;
3056 FOR_EACH_VEC_ELT (region_bbs, i, bb)
3057 {
3058 if (bitmap_bit_p (in_loop_bbs, bb->index))
3059 continue;
3060
3061 gimple_stmt_iterator gsi;
3062 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
3063 {
3064 gimple *stmt = gsi_stmt (gsi);
3065
3066 if (!gimple_store_p (stmt))
3067 {
3068 /* Update gsi to point to next stmt. */
3069 gsi_next (&gsi);
3070 continue;
3071 }
3072
3073 if (bitmap_bit_p (reduction_stores,
3074 SSA_NAME_VERSION (gimple_vdef (stmt))))
3075 {
3076 if (dump_file)
3077 {
3078 fprintf (dump_file,
3079 "skipped reduction store for single-gang"
3080 " neutering: ");
ef6cb4c7 3081 print_gimple_stmt (dump_file, stmt, 0);
61d9c527
TV
3082 }
3083
3084 /* Update gsi to point to next stmt. */
3085 gsi_next (&gsi);
3086 continue;
3087 }
3088
3089 changed = true;
3090
3091 if (gang_pos == NULL_TREE)
3092 {
3093 tree arg = build_int_cst (integer_type_node, GOMP_DIM_GANG);
3094 gcall *gang_single
3095 = gimple_build_call_internal (IFN_GOACC_DIM_POS, 1, arg);
3096 gang_pos = make_ssa_name (integer_type_node);
3097 gimple_call_set_lhs (gang_single, gang_pos);
3098 gimple_stmt_iterator start
3099 = gsi_start_bb (single_succ (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
3100 tree vuse = ssa_default_def (cfun, gimple_vop (cfun));
3101 gimple_set_vuse (gang_single, vuse);
3102 gsi_insert_before (&start, gang_single, GSI_SAME_STMT);
3103 }
3104
3105 if (dump_file)
3106 {
3107 fprintf (dump_file,
3108 "found store that needs single-gang neutering: ");
ef6cb4c7 3109 print_gimple_stmt (dump_file, stmt, 0);
61d9c527
TV
3110 }
3111
3112 {
3113 /* Split block before store. */
3114 gimple_stmt_iterator gsi2 = gsi;
3115 gsi_prev (&gsi2);
3116 edge e;
3117 if (gsi_end_p (gsi2))
3118 {
3119 e = split_block_after_labels (bb);
3120 gsi2 = gsi_last_bb (bb);
3121 }
3122 else
3123 e = split_block (bb, gsi_stmt (gsi2));
3124 basic_block bb2 = e->dest;
3125
3126 /* Split block after store. */
3127 gimple_stmt_iterator gsi3 = gsi_start_bb (bb2);
3128 edge e2 = split_block (bb2, gsi_stmt (gsi3));
3129 basic_block bb3 = e2->dest;
3130
3131 gimple *cond
3132 = gimple_build_cond (EQ_EXPR, gang_pos, integer_zero_node,
3133 NULL_TREE, NULL_TREE);
3134 gsi_insert_after (&gsi2, cond, GSI_NEW_STMT);
3135
3136 edge e3 = make_edge (bb, bb3, EDGE_FALSE_VALUE);
357067f2
JH
3137 /* FIXME: What is the probability? */
3138 e3->probability = profile_probability::guessed_never ();
61d9c527
TV
3139 e->flags = EDGE_TRUE_VALUE;
3140
3141 tree vdef = gimple_vdef (stmt);
3142 tree vuse = gimple_vuse (stmt);
3143
3144 tree phi_res = copy_ssa_name (vdef);
3145 gphi *new_phi = create_phi_node (phi_res, bb3);
3146 replace_uses_by (vdef, phi_res);
3147 add_phi_arg (new_phi, vuse, e3, UNKNOWN_LOCATION);
3148 add_phi_arg (new_phi, vdef, e2, UNKNOWN_LOCATION);
3149
3150 /* Update gsi to point to next stmt. */
3151 bb = bb3;
3152 gsi = gsi_start_bb (bb);
3153 }
3154 }
3155 }
3156
3157 return changed;
3158}
3159
3160/* Return true if the statements before and after the LOOP can be executed in
3161 parallel with the function containing the loop. Resolve conflicting stores
3162 outside LOOP by guarding them such that only a single gang executes them. */
3163
3164static bool
3165oacc_entry_exit_ok (struct loop *loop,
3166 reduction_info_table_type *reduction_list)
3167{
3168 basic_block *loop_bbs = get_loop_body_in_dom_order (loop);
3169 vec<basic_block> region_bbs
3170 = get_all_dominated_blocks (CDI_DOMINATORS, ENTRY_BLOCK_PTR_FOR_FN (cfun));
3171
3172 bitmap in_loop_bbs = BITMAP_ALLOC (NULL);
3173 bitmap_clear (in_loop_bbs);
3174 for (unsigned int i = 0; i < loop->num_nodes; i++)
3175 bitmap_set_bit (in_loop_bbs, loop_bbs[i]->index);
3176
3177 bitmap reduction_stores = BITMAP_ALLOC (NULL);
3178 bool res = oacc_entry_exit_ok_1 (in_loop_bbs, region_bbs, reduction_list,
3179 reduction_stores);
3180
3181 if (res)
3182 {
3183 bool changed = oacc_entry_exit_single_gang (in_loop_bbs, region_bbs,
3184 reduction_stores);
3185 if (changed)
3186 {
3187 free_dominance_info (CDI_DOMINATORS);
3188 calculate_dominance_info (CDI_DOMINATORS);
3189 }
3190 }
3191
4089c340 3192 region_bbs.release ();
61d9c527
TV
3193 free (loop_bbs);
3194
3195 BITMAP_FREE (in_loop_bbs);
3196 BITMAP_FREE (reduction_stores);
3197
3907c6cf
TV
3198 return res;
3199}
3200
5f40b3cb
ZD
3201/* Detect parallel loops and generate parallel code using libgomp
3202 primitives. Returns true if some loop was parallelized, false
3203 otherwise. */
3204
09489eb8 3205static bool
61d9c527 3206parallelize_loops (bool oacc_kernels_p)
5f40b3cb 3207{
f99c3557 3208 unsigned n_threads;
5f40b3cb
ZD
3209 bool changed = false;
3210 struct loop *loop;
e67d7a1e 3211 struct loop *skip_loop = NULL;
5f40b3cb 3212 struct tree_niter_desc niter_desc;
f873b205 3213 struct obstack parloop_obstack;
8adfe01d 3214 HOST_WIDE_INT estimated;
b05e0233 3215 source_location loop_loc;
f873b205 3216
5f40b3cb 3217 /* Do not parallelize loops in the functions created by parallelization. */
61d9c527
TV
3218 if (!oacc_kernels_p
3219 && parallelized_function_p (cfun->decl))
5f40b3cb 3220 return false;
61d9c527
TV
3221
3222 /* Do not parallelize loops in offloaded functions. */
3223 if (!oacc_kernels_p
629b3d75 3224 && oacc_get_fn_attrib (cfun->decl) != NULL)
61d9c527
TV
3225 return false;
3226
8adfe01d
RL
3227 if (cfun->has_nonlocal_label)
3228 return false;
5f40b3cb 3229
f99c3557
TS
3230 /* For OpenACC kernels, n_threads will be determined later; otherwise, it's
3231 the argument to -ftree-parallelize-loops. */
3232 if (oacc_kernels_p)
3233 n_threads = 0;
3234 else
3235 n_threads = flag_tree_parallelize_loops;
3236
f873b205 3237 gcc_obstack_init (&parloop_obstack);
c203e8a7 3238 reduction_info_table_type reduction_list (10);
a509ebb5 3239
61d9c527
TV
3240 calculate_dominance_info (CDI_DOMINATORS);
3241
f0bd40b1 3242 FOR_EACH_LOOP (loop, 0)
5f40b3cb 3243 {
e67d7a1e
TV
3244 if (loop == skip_loop)
3245 {
61d9c527
TV
3246 if (!loop->in_oacc_kernels_region
3247 && dump_file && (dump_flags & TDF_DETAILS))
e67d7a1e
TV
3248 fprintf (dump_file,
3249 "Skipping loop %d as inner loop of parallelized loop\n",
3250 loop->num);
3251
3252 skip_loop = loop->inner;
3253 continue;
3254 }
3255 else
3256 skip_loop = NULL;
3257
4a8fb1a1 3258 reduction_list.empty ();
61d9c527
TV
3259
3260 if (oacc_kernels_p)
3261 {
3262 if (!loop->in_oacc_kernels_region)
3263 continue;
3264
3265 /* Don't try to parallelize inner loops in an oacc kernels region. */
3266 if (loop->inner)
3267 skip_loop = loop->inner;
3268
3269 if (dump_file && (dump_flags & TDF_DETAILS))
3270 fprintf (dump_file,
3271 "Trying loop %d with header bb %d in oacc kernels"
3272 " region\n", loop->num, loop->header->index);
3273 }
3274
48710229
RL
3275 if (dump_file && (dump_flags & TDF_DETAILS))
3276 {
3277 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
3278 if (loop->inner)
3279 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
3280 else
3281 fprintf (dump_file, "loop %d is innermost\n",loop->num);
3282 }
b8698a0f 3283
48710229 3284 /* If we use autopar in graphite pass, we use its marked dependency
87d4d0ee
SP
3285 checking results. */
3286 if (flag_loop_parallelize_all && !loop->can_be_parallel)
48710229
RL
3287 {
3288 if (dump_file && (dump_flags & TDF_DETAILS))
3289 fprintf (dump_file, "loop is not parallel according to graphite\n");
87d4d0ee 3290 continue;
48710229 3291 }
87d4d0ee 3292
48710229
RL
3293 if (!single_dom_exit (loop))
3294 {
b8698a0f 3295
48710229
RL
3296 if (dump_file && (dump_flags & TDF_DETAILS))
3297 fprintf (dump_file, "loop is !single_dom_exit\n");
b8698a0f 3298
08dab97a 3299 continue;
48710229 3300 }
08dab97a
RL
3301
3302 if (/* And of course, the loop must be parallelizable. */
3303 !can_duplicate_loop_p (loop)
1d4af1e8 3304 || loop_has_blocks_with_irreducible_flag (loop)
8adfe01d 3305 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
9857228c 3306 /* FIXME: the check for vector phi nodes could be removed. */
69958396 3307 || loop_has_vector_phi_nodes (loop))
08dab97a 3308 continue;
e5b332cd 3309
652c4c71 3310 estimated = estimated_stmt_executions_int (loop);
e5b332cd 3311 if (estimated == -1)
ae7a7472 3312 estimated = likely_max_stmt_executions_int (loop);
87d4d0ee 3313 /* FIXME: Bypass this check as graphite doesn't update the
e5b332cd 3314 count and frequency correctly now. */
87d4d0ee 3315 if (!flag_loop_parallelize_all
61d9c527 3316 && !oacc_kernels_p
e5b332cd
RG
3317 && ((estimated != -1
3318 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
87d4d0ee
SP
3319 /* Do not bother with loops in cold areas. */
3320 || optimize_loop_nest_for_size_p (loop)))
08dab97a 3321 continue;
b8698a0f 3322
08dab97a
RL
3323 if (!try_get_loop_niter (loop, &niter_desc))
3324 continue;
3325
61d9c527 3326 if (!try_create_reduction_list (loop, &reduction_list, oacc_kernels_p))
08dab97a
RL
3327 continue;
3328
3907c6cf
TV
3329 if (loop_has_phi_with_address_arg (loop))
3330 continue;
3331
f873b205
LB
3332 if (!flag_loop_parallelize_all
3333 && !loop_parallel_p (loop, &parloop_obstack))
5f40b3cb
ZD
3334 continue;
3335
61d9c527
TV
3336 if (oacc_kernels_p
3337 && !oacc_entry_exit_ok (loop, &reduction_list))
3338 {
3339 if (dump_file)
3340 fprintf (dump_file, "entry/exit not ok: FAILED\n");
3341 continue;
3342 }
3343
5f40b3cb 3344 changed = true;
e67d7a1e 3345 skip_loop = loop->inner;
558b3185
RB
3346
3347 loop_loc = find_loop_location (loop);
3348 if (loop->inner)
3349 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
3350 "parallelizing outer loop %d\n", loop->num);
3351 else
3352 dump_printf_loc (MSG_OPTIMIZED_LOCATIONS, loop_loc,
3353 "parallelizing inner loop %d\n", loop->num);
61d9c527 3354
c203e8a7 3355 gen_parallel_loop (loop, &reduction_list,
61d9c527 3356 n_threads, &niter_desc, oacc_kernels_p);
5f40b3cb
ZD
3357 }
3358
f873b205 3359 obstack_free (&parloop_obstack, NULL);
6b8ed145
RG
3360
3361 /* Parallelization will cause new function calls to be inserted through
d086d311
RG
3362 which local variables will escape. Reset the points-to solution
3363 for ESCAPED. */
6b8ed145 3364 if (changed)
d086d311 3365 pt_solution_reset (&cfun->gimple_df->escaped);
6b8ed145 3366
5f40b3cb
ZD
3367 return changed;
3368}
3369
c1bf2a39
AM
3370/* Parallelization. */
3371
c1bf2a39
AM
3372namespace {
3373
3374const pass_data pass_data_parallelize_loops =
3375{
3376 GIMPLE_PASS, /* type */
3377 "parloops", /* name */
3378 OPTGROUP_LOOP, /* optinfo_flags */
c1bf2a39
AM
3379 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
3380 ( PROP_cfg | PROP_ssa ), /* properties_required */
3381 0, /* properties_provided */
3382 0, /* properties_destroyed */
3383 0, /* todo_flags_start */
3bea341f 3384 0, /* todo_flags_finish */
c1bf2a39
AM
3385};
3386
3387class pass_parallelize_loops : public gimple_opt_pass
3388{
3389public:
3390 pass_parallelize_loops (gcc::context *ctxt)
61d9c527
TV
3391 : gimple_opt_pass (pass_data_parallelize_loops, ctxt),
3392 oacc_kernels_p (false)
c1bf2a39
AM
3393 {}
3394
3395 /* opt_pass methods: */
f99c3557
TS
3396 virtual bool gate (function *)
3397 {
3398 if (oacc_kernels_p)
3399 return flag_openacc;
3400 else
3401 return flag_tree_parallelize_loops > 1;
3402 }
be55bfe6 3403 virtual unsigned int execute (function *);
61d9c527
TV
3404 opt_pass * clone () { return new pass_parallelize_loops (m_ctxt); }
3405 void set_pass_param (unsigned int n, bool param)
3406 {
3407 gcc_assert (n == 0);
3408 oacc_kernels_p = param;
3409 }
c1bf2a39 3410
61d9c527
TV
3411 private:
3412 bool oacc_kernels_p;
c1bf2a39
AM
3413}; // class pass_parallelize_loops
3414
be55bfe6
TS
3415unsigned
3416pass_parallelize_loops::execute (function *fun)
3417{
e9ff08b2
TV
3418 tree nthreads = builtin_decl_explicit (BUILT_IN_OMP_GET_NUM_THREADS);
3419 if (nthreads == NULL_TREE)
3420 return 0;
3421
12db0814
TV
3422 bool in_loop_pipeline = scev_initialized_p ();
3423 if (!in_loop_pipeline)
3424 loop_optimizer_init (LOOPS_NORMAL
3425 | LOOPS_HAVE_RECORDED_EXITS);
3426
3427 if (number_of_loops (fun) <= 1)
3428 return 0;
3429
3430 if (!in_loop_pipeline)
3431 {
3432 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3433 scev_initialize ();
3434 }
3435
3436 unsigned int todo = 0;
61d9c527 3437 if (parallelize_loops (oacc_kernels_p))
18751894
TV
3438 {
3439 fun->curr_properties &= ~(PROP_gimple_eomp);
e67d7a1e 3440
b2b29377 3441 checking_verify_loop_structure ();
e67d7a1e 3442
12db0814
TV
3443 todo |= TODO_update_ssa;
3444 }
3445
3446 if (!in_loop_pipeline)
3447 {
3448 scev_finalize ();
3449 loop_optimizer_finalize ();
18751894
TV
3450 }
3451
12db0814 3452 return todo;
be55bfe6
TS
3453}
3454
c1bf2a39
AM
3455} // anon namespace
3456
3457gimple_opt_pass *
3458make_pass_parallelize_loops (gcc::context *ctxt)
3459{
3460 return new pass_parallelize_loops (ctxt);
3461}