]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-parloops.c
Followup to [AArch64_be] Fix vtbl[34] and vtbx4
[thirdparty/gcc.git] / gcc / tree-parloops.c
CommitLineData
5f40b3cb 1/* Loop autoparallelization.
5624e564 2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
70837b71
RL
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5f40b3cb
ZD
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
6da7fc87 10Software Foundation; either version 3, or (at your option) any later
5f40b3cb
ZD
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
6da7fc87
NC
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
5f40b3cb
ZD
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
40e23961 25#include "alias.h"
c7131fb2 26#include "backend.h"
9fdcd34e 27#include "cfghooks.h"
40e23961 28#include "tree.h"
c7131fb2 29#include "gimple.h"
60393bbc 30#include "hard-reg-set.h"
c7131fb2
AM
31#include "ssa.h"
32#include "options.h"
33#include "fold-const.h"
2fb9a547 34#include "internal-fn.h"
45b0be94 35#include "gimplify.h"
5be5c238 36#include "gimple-iterator.h"
18f429e2 37#include "gimplify-me.h"
5be5c238 38#include "gimple-walk.h"
d8a2d370
DN
39#include "stor-layout.h"
40#include "tree-nested.h"
442b4905 41#include "tree-cfg.h"
e28030cf
AM
42#include "tree-ssa-loop-ivopts.h"
43#include "tree-ssa-loop-manip.h"
44#include "tree-ssa-loop-niter.h"
442b4905
AM
45#include "tree-ssa-loop.h"
46#include "tree-into-ssa.h"
5f40b3cb 47#include "cfgloop.h"
5f40b3cb 48#include "tree-data-ref.h"
1bd6497c 49#include "tree-scalar-evolution.h"
cf835838 50#include "gimple-pretty-print.h"
5f40b3cb 51#include "tree-pass.h"
5f40b3cb 52#include "langhooks.h"
a509ebb5 53#include "tree-vectorizer.h"
4a8fb1a1 54#include "tree-hasher.h"
c1bf2a39 55#include "tree-parloops.h"
0645c1a2 56#include "omp-low.h"
1fe37220 57#include "tree-nested.h"
a79b7ec5 58#include "cgraph.h"
7c82d827 59#include "tree-ssa.h"
f7f18684 60#include "params.h"
1f600fea 61#include "params-enum.h"
5f40b3cb
ZD
62
63/* This pass tries to distribute iterations of loops into several threads.
64 The implementation is straightforward -- for each loop we test whether its
65 iterations are independent, and if it is the case (and some additional
66 conditions regarding profitability and correctness are satisfied), we
726a989a
RB
67 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
68 machinery do its job.
b8698a0f 69
5f40b3cb
ZD
70 The most of the complexity is in bringing the code into shape expected
71 by the omp expanders:
726a989a
RB
72 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
73 variable and that the exit test is at the start of the loop body
74 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
5f40b3cb
ZD
75 variables by accesses through pointers, and breaking up ssa chains
76 by storing the values incoming to the parallelized loop to a structure
77 passed to the new function as an argument (something similar is done
78 in omp gimplification, unfortunately only a small part of the code
79 can be shared).
80
81 TODO:
82 -- if there are several parallelizable loops in a function, it may be
83 possible to generate the threads just once (using synchronization to
84 ensure that cross-loop dependences are obeyed).
70837b71
RL
85 -- handling of common reduction patterns for outer loops.
86
87 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
b8698a0f 88/*
a509ebb5 89 Reduction handling:
8a9ecffd 90 currently we use vect_force_simple_reduction() to detect reduction patterns.
a509ebb5 91 The code transformation will be introduced by an example.
b8698a0f
L
92
93
a509ebb5
RL
94parloop
95{
96 int sum=1;
97
0eb7e7aa 98 for (i = 0; i < N; i++)
a509ebb5
RL
99 {
100 x[i] = i + 3;
101 sum+=x[i];
102 }
103}
104
0eb7e7aa 105gimple-like code:
a509ebb5
RL
106header_bb:
107
0eb7e7aa
RL
108 # sum_29 = PHI <sum_11(5), 1(3)>
109 # i_28 = PHI <i_12(5), 0(3)>
110 D.1795_8 = i_28 + 3;
111 x[i_28] = D.1795_8;
112 sum_11 = D.1795_8 + sum_29;
113 i_12 = i_28 + 1;
114 if (N_6(D) > i_12)
115 goto header_bb;
116
a509ebb5
RL
117
118exit_bb:
119
0eb7e7aa
RL
120 # sum_21 = PHI <sum_11(4)>
121 printf (&"%d"[0], sum_21);
a509ebb5
RL
122
123
124after reduction transformation (only relevant parts):
125
126parloop
127{
128
129....
130
0eb7e7aa 131
fa10beec 132 # Storing the initial value given by the user. #
0eb7e7aa 133
ae0bce62 134 .paral_data_store.32.sum.27 = 1;
b8698a0f
L
135
136 #pragma omp parallel num_threads(4)
a509ebb5 137
0eb7e7aa 138 #pragma omp for schedule(static)
ae0bce62
RL
139
140 # The neutral element corresponding to the particular
141 reduction's operation, e.g. 0 for PLUS_EXPR,
142 1 for MULT_EXPR, etc. replaces the user's initial value. #
143
144 # sum.27_29 = PHI <sum.27_11, 0>
145
0eb7e7aa 146 sum.27_11 = D.1827_8 + sum.27_29;
ae0bce62 147
726a989a 148 GIMPLE_OMP_CONTINUE
a509ebb5 149
0eb7e7aa
RL
150 # Adding this reduction phi is done at create_phi_for_local_result() #
151 # sum.27_56 = PHI <sum.27_11, 0>
726a989a 152 GIMPLE_OMP_RETURN
b8698a0f
L
153
154 # Creating the atomic operation is done at
0eb7e7aa 155 create_call_for_reduction_1() #
a509ebb5 156
0eb7e7aa
RL
157 #pragma omp atomic_load
158 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
159 D.1840_60 = sum.27_56 + D.1839_59;
160 #pragma omp atomic_store (D.1840_60);
b8698a0f 161
726a989a 162 GIMPLE_OMP_RETURN
b8698a0f 163
0eb7e7aa
RL
164 # collecting the result after the join of the threads is done at
165 create_loads_for_reductions().
ae0bce62
RL
166 The value computed by the threads is loaded from the
167 shared struct. #
168
b8698a0f 169
0eb7e7aa 170 .paral_data_load.33_52 = &.paral_data_store.32;
ae0bce62 171 sum_37 = .paral_data_load.33_52->sum.27;
0eb7e7aa
RL
172 sum_43 = D.1795_41 + sum_37;
173
174 exit bb:
175 # sum_21 = PHI <sum_43, sum_26>
176 printf (&"%d"[0], sum_21);
177
178...
179
a509ebb5
RL
180}
181
182*/
183
5f40b3cb
ZD
184/* Minimal number of iterations of a loop that should be executed in each
185 thread. */
186#define MIN_PER_THREAD 100
187
b8698a0f 188/* Element of the hashtable, representing a
a509ebb5
RL
189 reduction in the current loop. */
190struct reduction_info
191{
355fe088
TS
192 gimple *reduc_stmt; /* reduction statement. */
193 gimple *reduc_phi; /* The phi node defining the reduction. */
726a989a 194 enum tree_code reduction_code;/* code for the reduction operation. */
5d1fd1de
JJ
195 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
196 result. */
538dd0b7 197 gphi *keep_res; /* The PHI_RESULT of this phi is the resulting value
a509ebb5 198 of the reduction variable when existing the loop. */
ae0bce62 199 tree initial_value; /* The initial value of the reduction var before entering the loop. */
a509ebb5 200 tree field; /* the name of the field in the parloop data structure intended for reduction. */
a509ebb5 201 tree init; /* reduction initialization value. */
538dd0b7 202 gphi *new_phi; /* (helper field) Newly created phi node whose result
a509ebb5
RL
203 will be passed to the atomic operation. Represents
204 the local result each thread computed for the reduction
205 operation. */
206};
207
4a8fb1a1 208/* Reduction info hashtable helpers. */
a509ebb5 209
95fbe13e 210struct reduction_hasher : free_ptr_hash <reduction_info>
a509ebb5 211{
67f58944
TS
212 static inline hashval_t hash (const reduction_info *);
213 static inline bool equal (const reduction_info *, const reduction_info *);
4a8fb1a1
LC
214};
215
216/* Equality and hash functions for hashtab code. */
a509ebb5 217
4a8fb1a1 218inline bool
67f58944 219reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
4a8fb1a1 220{
a509ebb5
RL
221 return (a->reduc_phi == b->reduc_phi);
222}
223
4a8fb1a1 224inline hashval_t
67f58944 225reduction_hasher::hash (const reduction_info *a)
a509ebb5 226{
5d1fd1de 227 return a->reduc_version;
a509ebb5
RL
228}
229
c203e8a7 230typedef hash_table<reduction_hasher> reduction_info_table_type;
4a8fb1a1
LC
231
232
a509ebb5 233static struct reduction_info *
355fe088 234reduction_phi (reduction_info_table_type *reduction_list, gimple *phi)
a509ebb5
RL
235{
236 struct reduction_info tmpred, *red;
237
c203e8a7 238 if (reduction_list->elements () == 0 || phi == NULL)
a509ebb5
RL
239 return NULL;
240
241 tmpred.reduc_phi = phi;
5d1fd1de 242 tmpred.reduc_version = gimple_uid (phi);
c203e8a7 243 red = reduction_list->find (&tmpred);
a509ebb5
RL
244
245 return red;
246}
247
5f40b3cb
ZD
248/* Element of hashtable of names to copy. */
249
250struct name_to_copy_elt
251{
252 unsigned version; /* The version of the name to copy. */
253 tree new_name; /* The new name used in the copy. */
254 tree field; /* The field of the structure used to pass the
255 value. */
256};
257
4a8fb1a1 258/* Name copies hashtable helpers. */
5f40b3cb 259
95fbe13e 260struct name_to_copy_hasher : free_ptr_hash <name_to_copy_elt>
5f40b3cb 261{
67f58944
TS
262 static inline hashval_t hash (const name_to_copy_elt *);
263 static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
4a8fb1a1
LC
264};
265
266/* Equality and hash functions for hashtab code. */
5f40b3cb 267
4a8fb1a1 268inline bool
67f58944 269name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
4a8fb1a1 270{
5f40b3cb
ZD
271 return a->version == b->version;
272}
273
4a8fb1a1 274inline hashval_t
67f58944 275name_to_copy_hasher::hash (const name_to_copy_elt *a)
5f40b3cb 276{
5f40b3cb
ZD
277 return (hashval_t) a->version;
278}
279
c203e8a7 280typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
4a8fb1a1 281
b305e3da
SP
282/* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
283 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
284 represents the denominator for every element in the matrix. */
285typedef struct lambda_trans_matrix_s
286{
287 lambda_matrix matrix;
288 int rowsize;
289 int colsize;
290 int denominator;
291} *lambda_trans_matrix;
292#define LTM_MATRIX(T) ((T)->matrix)
293#define LTM_ROWSIZE(T) ((T)->rowsize)
294#define LTM_COLSIZE(T) ((T)->colsize)
295#define LTM_DENOMINATOR(T) ((T)->denominator)
296
297/* Allocate a new transformation matrix. */
298
299static lambda_trans_matrix
300lambda_trans_matrix_new (int colsize, int rowsize,
301 struct obstack * lambda_obstack)
302{
303 lambda_trans_matrix ret;
304
305 ret = (lambda_trans_matrix)
306 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
307 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
308 LTM_ROWSIZE (ret) = rowsize;
309 LTM_COLSIZE (ret) = colsize;
310 LTM_DENOMINATOR (ret) = 1;
311 return ret;
312}
313
314/* Multiply a vector VEC by a matrix MAT.
315 MAT is an M*N matrix, and VEC is a vector with length N. The result
316 is stored in DEST which must be a vector of length M. */
317
318static void
319lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
320 lambda_vector vec, lambda_vector dest)
321{
322 int i, j;
323
324 lambda_vector_clear (dest, m);
325 for (i = 0; i < m; i++)
326 for (j = 0; j < n; j++)
327 dest[i] += matrix[i][j] * vec[j];
328}
329
330/* Return true if TRANS is a legal transformation matrix that respects
331 the dependence vectors in DISTS and DIRS. The conservative answer
332 is false.
333
334 "Wolfe proves that a unimodular transformation represented by the
335 matrix T is legal when applied to a loop nest with a set of
336 lexicographically non-negative distance vectors RDG if and only if
337 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
338 i.e.: if and only if it transforms the lexicographically positive
339 distance vectors to lexicographically positive vectors. Note that
340 a unimodular matrix must transform the zero vector (and only it) to
341 the zero vector." S.Muchnick. */
342
343static bool
344lambda_transform_legal_p (lambda_trans_matrix trans,
345 int nb_loops,
9771b263 346 vec<ddr_p> dependence_relations)
b305e3da
SP
347{
348 unsigned int i, j;
349 lambda_vector distres;
350 struct data_dependence_relation *ddr;
351
352 gcc_assert (LTM_COLSIZE (trans) == nb_loops
353 && LTM_ROWSIZE (trans) == nb_loops);
354
355 /* When there are no dependences, the transformation is correct. */
9771b263 356 if (dependence_relations.length () == 0)
b305e3da
SP
357 return true;
358
9771b263 359 ddr = dependence_relations[0];
b305e3da
SP
360 if (ddr == NULL)
361 return true;
362
363 /* When there is an unknown relation in the dependence_relations, we
364 know that it is no worth looking at this loop nest: give up. */
365 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
366 return false;
367
368 distres = lambda_vector_new (nb_loops);
369
370 /* For each distance vector in the dependence graph. */
9771b263 371 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
b305e3da
SP
372 {
373 /* Don't care about relations for which we know that there is no
374 dependence, nor about read-read (aka. output-dependences):
375 these data accesses can happen in any order. */
376 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
377 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
378 continue;
379
380 /* Conservatively answer: "this transformation is not valid". */
381 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
382 return false;
383
384 /* If the dependence could not be captured by a distance vector,
385 conservatively answer that the transform is not valid. */
386 if (DDR_NUM_DIST_VECTS (ddr) == 0)
387 return false;
388
389 /* Compute trans.dist_vect */
390 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
391 {
392 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
393 DDR_DIST_VECT (ddr, j), distres);
394
395 if (!lambda_vector_lexico_pos (distres, nb_loops))
396 return false;
397 }
398 }
399 return true;
400}
08dab97a
RL
401
402/* Data dependency analysis. Returns true if the iterations of LOOP
403 are independent on each other (that is, if we can execute them
404 in parallel). */
5f40b3cb
ZD
405
406static bool
f873b205 407loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
5f40b3cb 408{
9771b263
DN
409 vec<ddr_p> dependence_relations;
410 vec<data_reference_p> datarefs;
5f40b3cb
ZD
411 lambda_trans_matrix trans;
412 bool ret = false;
5f40b3cb
ZD
413
414 if (dump_file && (dump_flags & TDF_DETAILS))
48710229
RL
415 {
416 fprintf (dump_file, "Considering loop %d\n", loop->num);
417 if (!loop->inner)
418 fprintf (dump_file, "loop is innermost\n");
b8698a0f 419 else
48710229
RL
420 fprintf (dump_file, "loop NOT innermost\n");
421 }
5f40b3cb 422
5f40b3cb
ZD
423 /* Check for problems with dependences. If the loop can be reversed,
424 the iterations are independent. */
00f96dc9 425 auto_vec<loop_p, 3> loop_nest;
9771b263 426 datarefs.create (10);
07687835 427 dependence_relations.create (100);
9ca3d00e
AB
428 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
429 &dependence_relations))
430 {
431 if (dump_file && (dump_flags & TDF_DETAILS))
432 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
433 ret = false;
434 goto end;
435 }
5f40b3cb
ZD
436 if (dump_file && (dump_flags & TDF_DETAILS))
437 dump_data_dependence_relations (dump_file, dependence_relations);
438
f873b205 439 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
5f40b3cb
ZD
440 LTM_MATRIX (trans)[0][0] = -1;
441
442 if (lambda_transform_legal_p (trans, 1, dependence_relations))
443 {
444 ret = true;
445 if (dump_file && (dump_flags & TDF_DETAILS))
446 fprintf (dump_file, " SUCCESS: may be parallelized\n");
447 }
448 else if (dump_file && (dump_flags & TDF_DETAILS))
a509ebb5
RL
449 fprintf (dump_file,
450 " FAILED: data dependencies exist across iterations\n");
5f40b3cb 451
9ca3d00e 452 end:
5f40b3cb
ZD
453 free_dependence_relations (dependence_relations);
454 free_data_refs (datarefs);
455
456 return ret;
457}
458
1d4af1e8
SP
459/* Return true when LOOP contains basic blocks marked with the
460 BB_IRREDUCIBLE_LOOP flag. */
461
462static inline bool
463loop_has_blocks_with_irreducible_flag (struct loop *loop)
464{
465 unsigned i;
466 basic_block *bbs = get_loop_body_in_dom_order (loop);
467 bool res = true;
468
469 for (i = 0; i < loop->num_nodes; i++)
470 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
471 goto end;
472
473 res = false;
474 end:
475 free (bbs);
476 return res;
477}
478
8a171a59 479/* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
9f9f72aa 480 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
8a171a59 481 to their addresses that can be reused. The address of OBJ is known to
cba1eb61
JJ
482 be invariant in the whole function. Other needed statements are placed
483 right before GSI. */
5f40b3cb
ZD
484
485static tree
4a8fb1a1 486take_address_of (tree obj, tree type, edge entry,
c203e8a7 487 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
5f40b3cb 488{
8a171a59 489 int uid;
83d5977e 490 tree *var_p, name, addr;
538dd0b7 491 gassign *stmt;
726a989a 492 gimple_seq stmts;
5f40b3cb 493
8a171a59
ZD
494 /* Since the address of OBJ is invariant, the trees may be shared.
495 Avoid rewriting unrelated parts of the code. */
496 obj = unshare_expr (obj);
497 for (var_p = &obj;
498 handled_component_p (*var_p);
499 var_p = &TREE_OPERAND (*var_p, 0))
500 continue;
8a171a59 501
c9a410f0
RG
502 /* Canonicalize the access to base on a MEM_REF. */
503 if (DECL_P (*var_p))
504 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
505
506 /* Assign a canonical SSA name to the address of the base decl used
507 in the address and share it for all accesses and addresses based
508 on it. */
509 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
84baa4b9
TS
510 int_tree_map elt;
511 elt.uid = uid;
512 int_tree_map *slot = decl_address->find_slot (elt, INSERT);
513 if (!slot->to)
5f40b3cb 514 {
cba1eb61
JJ
515 if (gsi == NULL)
516 return NULL;
c9a410f0 517 addr = TREE_OPERAND (*var_p, 0);
29b89442
JJ
518 const char *obj_name
519 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
520 if (obj_name)
521 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
522 else
b731b390 523 name = make_ssa_name (TREE_TYPE (addr));
83d5977e 524 stmt = gimple_build_assign (name, addr);
726a989a 525 gsi_insert_on_edge_immediate (entry, stmt);
5f40b3cb 526
84baa4b9
TS
527 slot->uid = uid;
528 slot->to = name;
5f40b3cb 529 }
8a171a59 530 else
84baa4b9 531 name = slot->to;
5f40b3cb 532
c9a410f0
RG
533 /* Express the address in terms of the canonical SSA name. */
534 TREE_OPERAND (*var_p, 0) = name;
cba1eb61
JJ
535 if (gsi == NULL)
536 return build_fold_addr_expr_with_type (obj, type);
537
c9a410f0
RG
538 name = force_gimple_operand (build_addr (obj, current_function_decl),
539 &stmts, true, NULL_TREE);
540 if (!gimple_seq_empty_p (stmts))
cba1eb61 541 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
5f40b3cb 542
c9a410f0 543 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
8a171a59 544 {
726a989a 545 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
8a171a59 546 NULL_TREE);
726a989a 547 if (!gimple_seq_empty_p (stmts))
cba1eb61 548 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
8a171a59 549 }
5f40b3cb
ZD
550
551 return name;
552}
553
12efb1d7 554static tree
355fe088 555reduc_stmt_res (gimple *stmt)
12efb1d7
TV
556{
557 return (gimple_code (stmt) == GIMPLE_PHI
558 ? gimple_phi_result (stmt)
559 : gimple_assign_lhs (stmt));
560}
561
a509ebb5 562/* Callback for htab_traverse. Create the initialization statement
b8698a0f 563 for reduction described in SLOT, and place it at the preheader of
a509ebb5
RL
564 the loop described in DATA. */
565
4a8fb1a1
LC
566int
567initialize_reductions (reduction_info **slot, struct loop *loop)
a509ebb5 568{
f2c9f71d
TS
569 tree init;
570 tree type, arg;
a509ebb5
RL
571 edge e;
572
4a8fb1a1 573 struct reduction_info *const reduc = *slot;
a509ebb5 574
b8698a0f 575 /* Create initialization in preheader:
a509ebb5
RL
576 reduction_variable = initialization value of reduction. */
577
b8698a0f 578 /* In the phi node at the header, replace the argument coming
a509ebb5
RL
579 from the preheader with the reduction initialization value. */
580
f2c9f71d 581 /* Initialize the reduction. */
a509ebb5 582 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
f2c9f71d
TS
583 init = omp_reduction_init_op (gimple_location (reduc->reduc_stmt),
584 reduc->reduction_code, type);
a509ebb5
RL
585 reduc->init = init;
586
b8698a0f
L
587 /* Replace the argument representing the initialization value
588 with the initialization value for the reduction (neutral
589 element for the particular operation, e.g. 0 for PLUS_EXPR,
590 1 for MULT_EXPR, etc).
591 Keep the old value in a new variable "reduction_initial",
592 that will be taken in consideration after the parallel
0eb7e7aa 593 computing is done. */
a509ebb5
RL
594
595 e = loop_preheader_edge (loop);
596 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
597 /* Create new variable to hold the initial value. */
a509ebb5 598
a509ebb5 599 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
0eb7e7aa 600 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
ae0bce62 601 reduc->initial_value = arg;
a509ebb5
RL
602 return 1;
603}
5f40b3cb
ZD
604
605struct elv_data
606{
726a989a 607 struct walk_stmt_info info;
9f9f72aa 608 edge entry;
c203e8a7 609 int_tree_htab_type *decl_address;
cba1eb61 610 gimple_stmt_iterator *gsi;
5f40b3cb 611 bool changed;
cba1eb61 612 bool reset;
5f40b3cb
ZD
613};
614
9f9f72aa
AP
615/* Eliminates references to local variables in *TP out of the single
616 entry single exit region starting at DTA->ENTRY.
617 DECL_ADDRESS contains addresses of the references that had their
618 address taken already. If the expression is changed, CHANGED is
619 set to true. Callback for walk_tree. */
a509ebb5 620
5f40b3cb 621static tree
8a171a59 622eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
5f40b3cb 623{
3d9a9f94 624 struct elv_data *const dta = (struct elv_data *) data;
8a171a59 625 tree t = *tp, var, addr, addr_type, type, obj;
5f40b3cb
ZD
626
627 if (DECL_P (t))
628 {
629 *walk_subtrees = 0;
630
631 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
632 return NULL_TREE;
633
634 type = TREE_TYPE (t);
635 addr_type = build_pointer_type (type);
cba1eb61
JJ
636 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
637 dta->gsi);
638 if (dta->gsi == NULL && addr == NULL_TREE)
639 {
640 dta->reset = true;
641 return NULL_TREE;
642 }
643
70f34814 644 *tp = build_simple_mem_ref (addr);
5f40b3cb
ZD
645
646 dta->changed = true;
647 return NULL_TREE;
648 }
649
650 if (TREE_CODE (t) == ADDR_EXPR)
651 {
8a171a59
ZD
652 /* ADDR_EXPR may appear in two contexts:
653 -- as a gimple operand, when the address taken is a function invariant
654 -- as gimple rhs, when the resulting address in not a function
655 invariant
656 We do not need to do anything special in the latter case (the base of
657 the memory reference whose address is taken may be replaced in the
658 DECL_P case). The former case is more complicated, as we need to
659 ensure that the new address is still a gimple operand. Thus, it
660 is not sufficient to replace just the base of the memory reference --
661 we need to move the whole computation of the address out of the
662 loop. */
663 if (!is_gimple_val (t))
5f40b3cb
ZD
664 return NULL_TREE;
665
666 *walk_subtrees = 0;
8a171a59
ZD
667 obj = TREE_OPERAND (t, 0);
668 var = get_base_address (obj);
669 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
5f40b3cb
ZD
670 return NULL_TREE;
671
672 addr_type = TREE_TYPE (t);
cba1eb61
JJ
673 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
674 dta->gsi);
675 if (dta->gsi == NULL && addr == NULL_TREE)
676 {
677 dta->reset = true;
678 return NULL_TREE;
679 }
5f40b3cb
ZD
680 *tp = addr;
681
682 dta->changed = true;
683 return NULL_TREE;
684 }
685
726a989a 686 if (!EXPR_P (t))
5f40b3cb
ZD
687 *walk_subtrees = 0;
688
689 return NULL_TREE;
690}
691
cba1eb61 692/* Moves the references to local variables in STMT at *GSI out of the single
9f9f72aa
AP
693 entry single exit region starting at ENTRY. DECL_ADDRESS contains
694 addresses of the references that had their address taken
695 already. */
5f40b3cb
ZD
696
697static void
cba1eb61 698eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
c203e8a7 699 int_tree_htab_type *decl_address)
5f40b3cb
ZD
700{
701 struct elv_data dta;
355fe088 702 gimple *stmt = gsi_stmt (*gsi);
5f40b3cb 703
726a989a 704 memset (&dta.info, '\0', sizeof (dta.info));
9f9f72aa 705 dta.entry = entry;
5f40b3cb
ZD
706 dta.decl_address = decl_address;
707 dta.changed = false;
cba1eb61 708 dta.reset = false;
5f40b3cb 709
b5b8b0ac 710 if (gimple_debug_bind_p (stmt))
cba1eb61
JJ
711 {
712 dta.gsi = NULL;
713 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
714 eliminate_local_variables_1, &dta.info, NULL);
715 if (dta.reset)
716 {
717 gimple_debug_bind_reset_value (stmt);
718 dta.changed = true;
719 }
720 }
29b89442
JJ
721 else if (gimple_clobber_p (stmt))
722 {
723 stmt = gimple_build_nop ();
724 gsi_replace (gsi, stmt, false);
725 dta.changed = true;
726 }
b5b8b0ac 727 else
cba1eb61
JJ
728 {
729 dta.gsi = gsi;
730 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
731 }
5f40b3cb
ZD
732
733 if (dta.changed)
734 update_stmt (stmt);
735}
736
9f9f72aa
AP
737/* Eliminates the references to local variables from the single entry
738 single exit region between the ENTRY and EXIT edges.
b8698a0f 739
a509ebb5 740 This includes:
b8698a0f
L
741 1) Taking address of a local variable -- these are moved out of the
742 region (and temporary variable is created to hold the address if
a509ebb5 743 necessary).
9f9f72aa 744
5f40b3cb 745 2) Dereferencing a local variable -- these are replaced with indirect
a509ebb5 746 references. */
5f40b3cb
ZD
747
748static void
9f9f72aa 749eliminate_local_variables (edge entry, edge exit)
5f40b3cb 750{
9f9f72aa 751 basic_block bb;
00f96dc9 752 auto_vec<basic_block, 3> body;
5f40b3cb 753 unsigned i;
726a989a 754 gimple_stmt_iterator gsi;
cba1eb61 755 bool has_debug_stmt = false;
c203e8a7 756 int_tree_htab_type decl_address (10);
9f9f72aa
AP
757 basic_block entry_bb = entry->src;
758 basic_block exit_bb = exit->dest;
5f40b3cb 759
9f9f72aa 760 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 761
9771b263 762 FOR_EACH_VEC_ELT (body, i, bb)
9f9f72aa 763 if (bb != entry_bb && bb != exit_bb)
726a989a 764 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
ddb555ed
JJ
765 if (is_gimple_debug (gsi_stmt (gsi)))
766 {
767 if (gimple_debug_bind_p (gsi_stmt (gsi)))
768 has_debug_stmt = true;
769 }
cba1eb61 770 else
c203e8a7 771 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
cba1eb61
JJ
772
773 if (has_debug_stmt)
9771b263 774 FOR_EACH_VEC_ELT (body, i, bb)
cba1eb61
JJ
775 if (bb != entry_bb && bb != exit_bb)
776 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
777 if (gimple_debug_bind_p (gsi_stmt (gsi)))
c203e8a7 778 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
9f9f72aa
AP
779}
780
781/* Returns true if expression EXPR is not defined between ENTRY and
782 EXIT, i.e. if all its operands are defined outside of the region. */
783
784static bool
785expr_invariant_in_region_p (edge entry, edge exit, tree expr)
786{
787 basic_block entry_bb = entry->src;
788 basic_block exit_bb = exit->dest;
789 basic_block def_bb;
9f9f72aa
AP
790
791 if (is_gimple_min_invariant (expr))
792 return true;
793
794 if (TREE_CODE (expr) == SSA_NAME)
795 {
726a989a 796 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
9f9f72aa
AP
797 if (def_bb
798 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
799 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
800 return false;
801
802 return true;
803 }
804
726a989a 805 return false;
5f40b3cb
ZD
806}
807
808/* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
809 The copies are stored to NAME_COPIES, if NAME was already duplicated,
810 its duplicate stored in NAME_COPIES is returned.
b8698a0f 811
5f40b3cb
ZD
812 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
813 duplicated, storing the copies in DECL_COPIES. */
814
815static tree
c203e8a7
TS
816separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
817 int_tree_htab_type *decl_copies,
818 bool copy_name_p)
5f40b3cb
ZD
819{
820 tree copy, var, var_copy;
821 unsigned idx, uid, nuid;
84baa4b9 822 struct int_tree_map ielt;
5f40b3cb 823 struct name_to_copy_elt elt, *nelt;
4a8fb1a1 824 name_to_copy_elt **slot;
84baa4b9 825 int_tree_map *dslot;
5f40b3cb
ZD
826
827 if (TREE_CODE (name) != SSA_NAME)
828 return name;
829
830 idx = SSA_NAME_VERSION (name);
831 elt.version = idx;
c203e8a7
TS
832 slot = name_copies->find_slot_with_hash (&elt, idx,
833 copy_name_p ? INSERT : NO_INSERT);
5f40b3cb 834 if (slot && *slot)
4a8fb1a1 835 return (*slot)->new_name;
5f40b3cb 836
70b5e7dc
RG
837 if (copy_name_p)
838 {
839 copy = duplicate_ssa_name (name, NULL);
840 nelt = XNEW (struct name_to_copy_elt);
841 nelt->version = idx;
842 nelt->new_name = copy;
843 nelt->field = NULL_TREE;
844 *slot = nelt;
845 }
846 else
847 {
848 gcc_assert (!slot);
849 copy = name;
850 }
851
5f40b3cb 852 var = SSA_NAME_VAR (name);
70b5e7dc
RG
853 if (!var)
854 return copy;
855
5f40b3cb
ZD
856 uid = DECL_UID (var);
857 ielt.uid = uid;
84baa4b9
TS
858 dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
859 if (!dslot->to)
5f40b3cb
ZD
860 {
861 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
36ad7922 862 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
84baa4b9
TS
863 dslot->uid = uid;
864 dslot->to = var_copy;
5f40b3cb
ZD
865
866 /* Ensure that when we meet this decl next time, we won't duplicate
a509ebb5 867 it again. */
5f40b3cb
ZD
868 nuid = DECL_UID (var_copy);
869 ielt.uid = nuid;
84baa4b9
TS
870 dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
871 gcc_assert (!dslot->to);
872 dslot->uid = nuid;
873 dslot->to = var_copy;
5f40b3cb
ZD
874 }
875 else
84baa4b9 876 var_copy = dslot->to;
5f40b3cb 877
b2ec94d4 878 replace_ssa_name_symbol (copy, var_copy);
5f40b3cb
ZD
879 return copy;
880}
881
9f9f72aa
AP
882/* Finds the ssa names used in STMT that are defined outside the
883 region between ENTRY and EXIT and replaces such ssa names with
884 their duplicates. The duplicates are stored to NAME_COPIES. Base
885 decls of all ssa names used in STMT (including those defined in
886 LOOP) are replaced with the new temporary variables; the
887 replacement decls are stored in DECL_COPIES. */
5f40b3cb
ZD
888
889static void
355fe088 890separate_decls_in_region_stmt (edge entry, edge exit, gimple *stmt,
c203e8a7
TS
891 name_to_copy_table_type *name_copies,
892 int_tree_htab_type *decl_copies)
5f40b3cb
ZD
893{
894 use_operand_p use;
895 def_operand_p def;
896 ssa_op_iter oi;
897 tree name, copy;
898 bool copy_name_p;
899
5f40b3cb 900 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
a509ebb5
RL
901 {
902 name = DEF_FROM_PTR (def);
903 gcc_assert (TREE_CODE (name) == SSA_NAME);
9f9f72aa
AP
904 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
905 false);
a509ebb5
RL
906 gcc_assert (copy == name);
907 }
5f40b3cb
ZD
908
909 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
a509ebb5
RL
910 {
911 name = USE_FROM_PTR (use);
912 if (TREE_CODE (name) != SSA_NAME)
913 continue;
914
9f9f72aa
AP
915 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
916 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
917 copy_name_p);
a509ebb5
RL
918 SET_USE (use, copy);
919 }
5f40b3cb
ZD
920}
921
b5b8b0ac
AO
922/* Finds the ssa names used in STMT that are defined outside the
923 region between ENTRY and EXIT and replaces such ssa names with
924 their duplicates. The duplicates are stored to NAME_COPIES. Base
925 decls of all ssa names used in STMT (including those defined in
926 LOOP) are replaced with the new temporary variables; the
927 replacement decls are stored in DECL_COPIES. */
928
929static bool
355fe088 930separate_decls_in_region_debug (gimple *stmt,
c203e8a7
TS
931 name_to_copy_table_type *name_copies,
932 int_tree_htab_type *decl_copies)
b5b8b0ac
AO
933{
934 use_operand_p use;
935 ssa_op_iter oi;
936 tree var, name;
937 struct int_tree_map ielt;
938 struct name_to_copy_elt elt;
4a8fb1a1 939 name_to_copy_elt **slot;
84baa4b9 940 int_tree_map *dslot;
b5b8b0ac 941
ddb555ed
JJ
942 if (gimple_debug_bind_p (stmt))
943 var = gimple_debug_bind_get_var (stmt);
944 else if (gimple_debug_source_bind_p (stmt))
945 var = gimple_debug_source_bind_get_var (stmt);
946 else
947 return true;
598e67d7 948 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
4f2a9af8 949 return true;
b5b8b0ac
AO
950 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
951 ielt.uid = DECL_UID (var);
84baa4b9 952 dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
b5b8b0ac
AO
953 if (!dslot)
954 return true;
ddb555ed 955 if (gimple_debug_bind_p (stmt))
84baa4b9 956 gimple_debug_bind_set_var (stmt, dslot->to);
ddb555ed 957 else if (gimple_debug_source_bind_p (stmt))
84baa4b9 958 gimple_debug_source_bind_set_var (stmt, dslot->to);
b5b8b0ac
AO
959
960 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
961 {
962 name = USE_FROM_PTR (use);
963 if (TREE_CODE (name) != SSA_NAME)
964 continue;
965
966 elt.version = SSA_NAME_VERSION (name);
c203e8a7 967 slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
b5b8b0ac
AO
968 if (!slot)
969 {
970 gimple_debug_bind_reset_value (stmt);
971 update_stmt (stmt);
972 break;
973 }
974
4a8fb1a1 975 SET_USE (use, (*slot)->new_name);
b5b8b0ac
AO
976 }
977
978 return false;
979}
980
0eb7e7aa
RL
981/* Callback for htab_traverse. Adds a field corresponding to the reduction
982 specified in SLOT. The type is passed in DATA. */
983
4a8fb1a1
LC
984int
985add_field_for_reduction (reduction_info **slot, tree type)
a509ebb5 986{
b8698a0f 987
4a8fb1a1 988 struct reduction_info *const red = *slot;
12efb1d7 989 tree var = reduc_stmt_res (red->reduc_stmt);
aa06a978
RB
990 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
991 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
0eb7e7aa
RL
992
993 insert_field_into_struct (type, field);
994
995 red->field = field;
996
997 return 1;
998}
a509ebb5 999
5f40b3cb 1000/* Callback for htab_traverse. Adds a field corresponding to a ssa name
b8698a0f 1001 described in SLOT. The type is passed in DATA. */
5f40b3cb 1002
4a8fb1a1
LC
1003int
1004add_field_for_name (name_to_copy_elt **slot, tree type)
5f40b3cb 1005{
4a8fb1a1 1006 struct name_to_copy_elt *const elt = *slot;
5f40b3cb 1007 tree name = ssa_name (elt->version);
70b5e7dc
RG
1008 tree field = build_decl (UNKNOWN_LOCATION,
1009 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1010 TREE_TYPE (name));
5f40b3cb
ZD
1011
1012 insert_field_into_struct (type, field);
1013 elt->field = field;
a509ebb5 1014
5f40b3cb
ZD
1015 return 1;
1016}
1017
b8698a0f
L
1018/* Callback for htab_traverse. A local result is the intermediate result
1019 computed by a single
fa10beec 1020 thread, or the initial value in case no iteration was executed.
b8698a0f
L
1021 This function creates a phi node reflecting these values.
1022 The phi's result will be stored in NEW_PHI field of the
1023 reduction's data structure. */
a509ebb5 1024
4a8fb1a1
LC
1025int
1026create_phi_for_local_result (reduction_info **slot, struct loop *loop)
a509ebb5 1027{
4a8fb1a1 1028 struct reduction_info *const reduc = *slot;
a509ebb5 1029 edge e;
538dd0b7 1030 gphi *new_phi;
e67d7a1e 1031 basic_block store_bb, continue_bb;
a509ebb5 1032 tree local_res;
f5045c96 1033 source_location locus;
a509ebb5 1034
b8698a0f
L
1035 /* STORE_BB is the block where the phi
1036 should be stored. It is the destination of the loop exit.
726a989a 1037 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
e67d7a1e
TV
1038 continue_bb = single_pred (loop->latch);
1039 store_bb = FALLTHRU_EDGE (continue_bb)->dest;
a509ebb5
RL
1040
1041 /* STORE_BB has two predecessors. One coming from the loop
1042 (the reduction's result is computed at the loop),
b8698a0f
L
1043 and another coming from a block preceding the loop,
1044 when no iterations
1045 are executed (the initial value should be taken). */
e67d7a1e 1046 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (continue_bb))
a509ebb5
RL
1047 e = EDGE_PRED (store_bb, 1);
1048 else
1049 e = EDGE_PRED (store_bb, 0);
12efb1d7
TV
1050 tree lhs = reduc_stmt_res (reduc->reduc_stmt);
1051 local_res = copy_ssa_name (lhs);
f5045c96 1052 locus = gimple_location (reduc->reduc_stmt);
a509ebb5 1053 new_phi = create_phi_node (local_res, store_bb);
9e227d60 1054 add_phi_arg (new_phi, reduc->init, e, locus);
e67d7a1e 1055 add_phi_arg (new_phi, lhs, FALLTHRU_EDGE (continue_bb), locus);
a509ebb5
RL
1056 reduc->new_phi = new_phi;
1057
1058 return 1;
1059}
5f40b3cb
ZD
1060
1061struct clsn_data
1062{
1063 tree store;
1064 tree load;
1065
1066 basic_block store_bb;
1067 basic_block load_bb;
1068};
1069
a509ebb5 1070/* Callback for htab_traverse. Create an atomic instruction for the
b8698a0f 1071 reduction described in SLOT.
a509ebb5
RL
1072 DATA annotates the place in memory the atomic operation relates to,
1073 and the basic block it needs to be generated in. */
1074
4a8fb1a1
LC
1075int
1076create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
a509ebb5 1077{
4a8fb1a1 1078 struct reduction_info *const reduc = *slot;
726a989a 1079 gimple_stmt_iterator gsi;
a509ebb5 1080 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
a509ebb5
RL
1081 tree load_struct;
1082 basic_block bb;
1083 basic_block new_bb;
1084 edge e;
0f900dfa 1085 tree t, addr, ref, x;
726a989a 1086 tree tmp_load, name;
355fe088 1087 gimple *load;
a509ebb5 1088
70f34814 1089 load_struct = build_simple_mem_ref (clsn_data->load);
a509ebb5 1090 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
a509ebb5
RL
1091
1092 addr = build_addr (t, current_function_decl);
1093
1094 /* Create phi node. */
1095 bb = clsn_data->load_bb;
1096
b13c907a
RB
1097 gsi = gsi_last_bb (bb);
1098 e = split_block (bb, gsi_stmt (gsi));
a509ebb5
RL
1099 new_bb = e->dest;
1100
b731b390
JJ
1101 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1102 tmp_load = make_ssa_name (tmp_load);
726a989a 1103 load = gimple_build_omp_atomic_load (tmp_load, addr);
a509ebb5 1104 SSA_NAME_DEF_STMT (tmp_load) = load;
726a989a
RB
1105 gsi = gsi_start_bb (new_bb);
1106 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
a509ebb5
RL
1107
1108 e = split_block (new_bb, load);
1109 new_bb = e->dest;
726a989a 1110 gsi = gsi_start_bb (new_bb);
a509ebb5 1111 ref = tmp_load;
726a989a
RB
1112 x = fold_build2 (reduc->reduction_code,
1113 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1114 PHI_RESULT (reduc->new_phi));
a509ebb5 1115
726a989a
RB
1116 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1117 GSI_CONTINUE_LINKING);
a509ebb5 1118
726a989a 1119 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
a509ebb5
RL
1120 return 1;
1121}
1122
b8698a0f
L
1123/* Create the atomic operation at the join point of the threads.
1124 REDUCTION_LIST describes the reductions in the LOOP.
1125 LD_ST_DATA describes the shared data structure where
a509ebb5
RL
1126 shared data is stored in and loaded from. */
1127static void
4a8fb1a1 1128create_call_for_reduction (struct loop *loop,
c203e8a7 1129 reduction_info_table_type *reduction_list,
a509ebb5
RL
1130 struct clsn_data *ld_st_data)
1131{
c203e8a7 1132 reduction_list->traverse <struct loop *, create_phi_for_local_result> (loop);
726a989a 1133 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
e67d7a1e
TV
1134 basic_block continue_bb = single_pred (loop->latch);
1135 ld_st_data->load_bb = FALLTHRU_EDGE (continue_bb)->dest;
4a8fb1a1 1136 reduction_list
c203e8a7 1137 ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
a509ebb5
RL
1138}
1139
ae0bce62
RL
1140/* Callback for htab_traverse. Loads the final reduction value at the
1141 join point of all threads, and inserts it in the right place. */
a509ebb5 1142
4a8fb1a1
LC
1143int
1144create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
a509ebb5 1145{
4a8fb1a1 1146 struct reduction_info *const red = *slot;
355fe088 1147 gimple *stmt;
726a989a 1148 gimple_stmt_iterator gsi;
12efb1d7 1149 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
a509ebb5 1150 tree load_struct;
ae0bce62 1151 tree name;
a509ebb5
RL
1152 tree x;
1153
79855460
TV
1154 /* If there's no exit phi, the result of the reduction is unused. */
1155 if (red->keep_res == NULL)
1156 return 1;
1157
726a989a 1158 gsi = gsi_after_labels (clsn_data->load_bb);
70f34814 1159 load_struct = build_simple_mem_ref (clsn_data->load);
a509ebb5
RL
1160 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1161 NULL_TREE);
a509ebb5 1162
ae0bce62 1163 x = load_struct;
a509ebb5 1164 name = PHI_RESULT (red->keep_res);
726a989a 1165 stmt = gimple_build_assign (name, x);
a509ebb5 1166
726a989a 1167 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
a509ebb5 1168
726a989a
RB
1169 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1170 !gsi_end_p (gsi); gsi_next (&gsi))
1171 if (gsi_stmt (gsi) == red->keep_res)
1172 {
1173 remove_phi_node (&gsi, false);
1174 return 1;
1175 }
1176 gcc_unreachable ();
a509ebb5
RL
1177}
1178
b8698a0f 1179/* Load the reduction result that was stored in LD_ST_DATA.
a509ebb5 1180 REDUCTION_LIST describes the list of reductions that the
fa10beec 1181 loads should be generated for. */
a509ebb5 1182static void
c203e8a7 1183create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
a509ebb5
RL
1184 struct clsn_data *ld_st_data)
1185{
726a989a 1186 gimple_stmt_iterator gsi;
a509ebb5 1187 tree t;
355fe088 1188 gimple *stmt;
a509ebb5 1189
726a989a 1190 gsi = gsi_after_labels (ld_st_data->load_bb);
a509ebb5 1191 t = build_fold_addr_expr (ld_st_data->store);
726a989a 1192 stmt = gimple_build_assign (ld_st_data->load, t);
a509ebb5 1193
726a989a 1194 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
a509ebb5 1195
4a8fb1a1 1196 reduction_list
c203e8a7 1197 ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
a509ebb5
RL
1198
1199}
1200
0eb7e7aa
RL
1201/* Callback for htab_traverse. Store the neutral value for the
1202 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1203 1 for MULT_EXPR, etc. into the reduction field.
b8698a0f
L
1204 The reduction is specified in SLOT. The store information is
1205 passed in DATA. */
0eb7e7aa 1206
4a8fb1a1
LC
1207int
1208create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
0eb7e7aa 1209{
4a8fb1a1 1210 struct reduction_info *const red = *slot;
726a989a 1211 tree t;
355fe088 1212 gimple *stmt;
726a989a 1213 gimple_stmt_iterator gsi;
12efb1d7 1214 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
726a989a
RB
1215
1216 gsi = gsi_last_bb (clsn_data->store_bb);
1217 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1218 stmt = gimple_build_assign (t, red->initial_value);
726a989a 1219 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
0eb7e7aa
RL
1220
1221 return 1;
1222}
1223
a509ebb5
RL
1224/* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1225 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1226 specified in SLOT. */
1227
4a8fb1a1
LC
1228int
1229create_loads_and_stores_for_name (name_to_copy_elt **slot,
1230 struct clsn_data *clsn_data)
5f40b3cb 1231{
4a8fb1a1 1232 struct name_to_copy_elt *const elt = *slot;
726a989a 1233 tree t;
355fe088 1234 gimple *stmt;
726a989a 1235 gimple_stmt_iterator gsi;
5f40b3cb 1236 tree type = TREE_TYPE (elt->new_name);
5f40b3cb
ZD
1237 tree load_struct;
1238
726a989a
RB
1239 gsi = gsi_last_bb (clsn_data->store_bb);
1240 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1241 stmt = gimple_build_assign (t, ssa_name (elt->version));
726a989a 1242 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb 1243
726a989a 1244 gsi = gsi_last_bb (clsn_data->load_bb);
70f34814 1245 load_struct = build_simple_mem_ref (clsn_data->load);
726a989a
RB
1246 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1247 stmt = gimple_build_assign (elt->new_name, t);
726a989a 1248 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1249
1250 return 1;
1251}
1252
1253/* Moves all the variables used in LOOP and defined outside of it (including
1254 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1255 name) to a structure created for this purpose. The code
b8698a0f 1256
5f40b3cb
ZD
1257 while (1)
1258 {
1259 use (a);
1260 use (b);
1261 }
1262
1263 is transformed this way:
1264
1265 bb0:
1266 old.a = a;
1267 old.b = b;
1268
1269 bb1:
1270 a' = new->a;
1271 b' = new->b;
1272 while (1)
1273 {
1274 use (a');
1275 use (b');
1276 }
1277
1278 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1279 pointer `new' is intentionally not initialized (the loop will be split to a
1280 separate function later, and `new' will be initialized from its arguments).
a509ebb5 1281 LD_ST_DATA holds information about the shared data structure used to pass
b8698a0f
L
1282 information among the threads. It is initialized here, and
1283 gen_parallel_loop will pass it to create_call_for_reduction that
1284 needs this information. REDUCTION_LIST describes the reductions
a509ebb5 1285 in LOOP. */
5f40b3cb
ZD
1286
1287static void
4a8fb1a1 1288separate_decls_in_region (edge entry, edge exit,
c203e8a7 1289 reduction_info_table_type *reduction_list,
b8698a0f 1290 tree *arg_struct, tree *new_arg_struct,
9f9f72aa 1291 struct clsn_data *ld_st_data)
a509ebb5 1292
5f40b3cb 1293{
9f9f72aa 1294 basic_block bb1 = split_edge (entry);
5f40b3cb 1295 basic_block bb0 = single_pred (bb1);
c203e8a7
TS
1296 name_to_copy_table_type name_copies (10);
1297 int_tree_htab_type decl_copies (10);
5f40b3cb 1298 unsigned i;
726a989a
RB
1299 tree type, type_name, nvar;
1300 gimple_stmt_iterator gsi;
5f40b3cb 1301 struct clsn_data clsn_data;
00f96dc9 1302 auto_vec<basic_block, 3> body;
9f9f72aa
AP
1303 basic_block bb;
1304 basic_block entry_bb = bb1;
1305 basic_block exit_bb = exit->dest;
b5b8b0ac 1306 bool has_debug_stmt = false;
5f40b3cb 1307
726a989a 1308 entry = single_succ_edge (entry_bb);
9f9f72aa 1309 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 1310
9771b263 1311 FOR_EACH_VEC_ELT (body, i, bb)
9f9f72aa 1312 {
b8698a0f 1313 if (bb != entry_bb && bb != exit_bb)
9f9f72aa 1314 {
726a989a
RB
1315 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1316 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
c203e8a7 1317 &name_copies, &decl_copies);
726a989a
RB
1318
1319 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
b5b8b0ac 1320 {
355fe088 1321 gimple *stmt = gsi_stmt (gsi);
b5b8b0ac
AO
1322
1323 if (is_gimple_debug (stmt))
1324 has_debug_stmt = true;
1325 else
1326 separate_decls_in_region_stmt (entry, exit, stmt,
c203e8a7 1327 &name_copies, &decl_copies);
b5b8b0ac 1328 }
9f9f72aa 1329 }
5f40b3cb 1330 }
9f9f72aa 1331
b5b8b0ac
AO
1332 /* Now process debug bind stmts. We must not create decls while
1333 processing debug stmts, so we defer their processing so as to
1334 make sure we will have debug info for as many variables as
1335 possible (all of those that were dealt with in the loop above),
1336 and discard those for which we know there's nothing we can
1337 do. */
1338 if (has_debug_stmt)
9771b263 1339 FOR_EACH_VEC_ELT (body, i, bb)
b5b8b0ac
AO
1340 if (bb != entry_bb && bb != exit_bb)
1341 {
1342 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1343 {
355fe088 1344 gimple *stmt = gsi_stmt (gsi);
b5b8b0ac 1345
ddb555ed 1346 if (is_gimple_debug (stmt))
b5b8b0ac 1347 {
c203e8a7
TS
1348 if (separate_decls_in_region_debug (stmt, &name_copies,
1349 &decl_copies))
b5b8b0ac
AO
1350 {
1351 gsi_remove (&gsi, true);
1352 continue;
1353 }
1354 }
1355
1356 gsi_next (&gsi);
1357 }
1358 }
1359
c203e8a7 1360 if (name_copies.elements () == 0 && reduction_list->elements () == 0)
5f40b3cb
ZD
1361 {
1362 /* It may happen that there is nothing to copy (if there are only
a509ebb5 1363 loop carried and external variables in the loop). */
5f40b3cb
ZD
1364 *arg_struct = NULL;
1365 *new_arg_struct = NULL;
1366 }
1367 else
1368 {
1369 /* Create the type for the structure to store the ssa names to. */
1370 type = lang_hooks.types.make_type (RECORD_TYPE);
9ff70652 1371 type_name = build_decl (UNKNOWN_LOCATION,
c2255bc4 1372 TYPE_DECL, create_tmp_var_name (".paral_data"),
5f40b3cb
ZD
1373 type);
1374 TYPE_NAME (type) = type_name;
1375
4a8fb1a1 1376 name_copies.traverse <tree, add_field_for_name> (type);
c203e8a7 1377 if (reduction_list && reduction_list->elements () > 0)
0eb7e7aa
RL
1378 {
1379 /* Create the fields for reductions. */
c203e8a7 1380 reduction_list->traverse <tree, add_field_for_reduction> (type);
0eb7e7aa 1381 }
5f40b3cb 1382 layout_type (type);
b8698a0f 1383
5f40b3cb
ZD
1384 /* Create the loads and stores. */
1385 *arg_struct = create_tmp_var (type, ".paral_data_store");
5f40b3cb 1386 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
b731b390 1387 *new_arg_struct = make_ssa_name (nvar);
5f40b3cb 1388
a509ebb5
RL
1389 ld_st_data->store = *arg_struct;
1390 ld_st_data->load = *new_arg_struct;
1391 ld_st_data->store_bb = bb0;
1392 ld_st_data->load_bb = bb1;
0eb7e7aa 1393
4a8fb1a1
LC
1394 name_copies
1395 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1396 (ld_st_data);
a509ebb5 1397
ae0bce62
RL
1398 /* Load the calculation from memory (after the join of the threads). */
1399
c203e8a7 1400 if (reduction_list && reduction_list->elements () > 0)
a509ebb5 1401 {
4a8fb1a1 1402 reduction_list
c203e8a7
TS
1403 ->traverse <struct clsn_data *, create_stores_for_reduction>
1404 (ld_st_data);
b731b390 1405 clsn_data.load = make_ssa_name (nvar);
9f9f72aa 1406 clsn_data.load_bb = exit->dest;
a509ebb5
RL
1407 clsn_data.store = ld_st_data->store;
1408 create_final_loads_for_reduction (reduction_list, &clsn_data);
1409 }
5f40b3cb 1410 }
5f40b3cb
ZD
1411}
1412
a79b7ec5 1413/* Returns true if FN was created to run in parallel. */
5f40b3cb 1414
62e0a1ed 1415bool
a79b7ec5 1416parallelized_function_p (tree fndecl)
5f40b3cb 1417{
a79b7ec5
TV
1418 cgraph_node *node = cgraph_node::get (fndecl);
1419 gcc_assert (node != NULL);
1420 return node->parallelized_function;
5f40b3cb
ZD
1421}
1422
1423/* Creates and returns an empty function that will receive the body of
1424 a parallelized loop. */
1425
1426static tree
9ff70652 1427create_loop_fn (location_t loc)
5f40b3cb
ZD
1428{
1429 char buf[100];
1430 char *tname;
1431 tree decl, type, name, t;
1432 struct function *act_cfun = cfun;
1433 static unsigned loopfn_num;
1434
5368224f 1435 loc = LOCATION_LOCUS (loc);
5f40b3cb
ZD
1436 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1437 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1438 clean_symbol_name (tname);
1439 name = get_identifier (tname);
1440 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1441
9ff70652 1442 decl = build_decl (loc, FUNCTION_DECL, name, type);
5f40b3cb
ZD
1443 TREE_STATIC (decl) = 1;
1444 TREE_USED (decl) = 1;
1445 DECL_ARTIFICIAL (decl) = 1;
1446 DECL_IGNORED_P (decl) = 0;
1447 TREE_PUBLIC (decl) = 0;
1448 DECL_UNINLINABLE (decl) = 1;
1449 DECL_EXTERNAL (decl) = 0;
1450 DECL_CONTEXT (decl) = NULL_TREE;
1451 DECL_INITIAL (decl) = make_node (BLOCK);
1452
9ff70652 1453 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
5f40b3cb
ZD
1454 DECL_ARTIFICIAL (t) = 1;
1455 DECL_IGNORED_P (t) = 1;
1456 DECL_RESULT (decl) = t;
1457
9ff70652 1458 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
5f40b3cb
ZD
1459 ptr_type_node);
1460 DECL_ARTIFICIAL (t) = 1;
1461 DECL_ARG_TYPE (t) = ptr_type_node;
1462 DECL_CONTEXT (t) = decl;
1463 TREE_USED (t) = 1;
1464 DECL_ARGUMENTS (decl) = t;
1465
182e0d71 1466 allocate_struct_function (decl, false);
5f40b3cb
ZD
1467
1468 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1469 it. */
5576d6f2 1470 set_cfun (act_cfun);
5f40b3cb
ZD
1471
1472 return decl;
1473}
1474
7c82d827
TV
1475/* Replace uses of NAME by VAL in block BB. */
1476
1477static void
1478replace_uses_in_bb_by (tree name, tree val, basic_block bb)
1479{
355fe088 1480 gimple *use_stmt;
7c82d827
TV
1481 imm_use_iterator imm_iter;
1482
1483 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
1484 {
1485 if (gimple_bb (use_stmt) != bb)
1486 continue;
1487
1488 use_operand_p use_p;
1489 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1490 SET_USE (use_p, val);
1491 }
1492}
1493
7c82d827
TV
1494/* Do transformation from:
1495
1496 <bb preheader>:
1497 ...
1498 goto <bb header>
1499
1500 <bb header>:
1501 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1502 sum_a = PHI <sum_init (preheader), sum_b (latch)>
1503 ...
1504 use (ivtmp_a)
1505 ...
1506 sum_b = sum_a + sum_update
1507 ...
1508 if (ivtmp_a < n)
1509 goto <bb latch>;
1510 else
1511 goto <bb exit>;
1512
1513 <bb latch>:
1514 ivtmp_b = ivtmp_a + 1;
1515 goto <bb header>
1516
1517 <bb exit>:
712cb0bb 1518 sum_z = PHI <sum_b (cond[1]), ...>
7c82d827
TV
1519
1520 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
1521 that's <bb header>.
1522
1523 to:
1524
1525 <bb preheader>:
1526 ...
1527 goto <bb newheader>
1528
1529 <bb header>:
1530 ivtmp_a = PHI <ivtmp_c (latch)>
1531 sum_a = PHI <sum_c (latch)>
1532 ...
1533 use (ivtmp_a)
1534 ...
1535 sum_b = sum_a + sum_update
1536 ...
1537 goto <bb latch>;
1538
1539 <bb newheader>:
1540 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1541 sum_c = PHI <sum_init (preheader), sum_b (latch)>
1542 if (ivtmp_c < n + 1)
1543 goto <bb header>;
1544 else
712cb0bb 1545 goto <bb newexit>;
7c82d827
TV
1546
1547 <bb latch>:
1548 ivtmp_b = ivtmp_a + 1;
1549 goto <bb newheader>
1550
712cb0bb
TV
1551 <bb newexit>:
1552 sum_y = PHI <sum_c (newheader)>
1553
7c82d827 1554 <bb exit>:
712cb0bb 1555 sum_z = PHI <sum_y (newexit), ...>
7c82d827
TV
1556
1557
1558 In unified diff format:
1559
1560 <bb preheader>:
1561 ...
1562- goto <bb header>
1563+ goto <bb newheader>
1564
1565 <bb header>:
1566- ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1567- sum_a = PHI <sum_init (preheader), sum_b (latch)>
1568+ ivtmp_a = PHI <ivtmp_c (latch)>
1569+ sum_a = PHI <sum_c (latch)>
1570 ...
1571 use (ivtmp_a)
1572 ...
1573 sum_b = sum_a + sum_update
1574 ...
1575- if (ivtmp_a < n)
1576- goto <bb latch>;
1577+ goto <bb latch>;
1578+
1579+ <bb newheader>:
1580+ ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1581+ sum_c = PHI <sum_init (preheader), sum_b (latch)>
1582+ if (ivtmp_c < n + 1)
1583+ goto <bb header>;
1584 else
1585 goto <bb exit>;
1586
1587 <bb latch>:
1588 ivtmp_b = ivtmp_a + 1;
1589- goto <bb header>
1590+ goto <bb newheader>
1591
712cb0bb
TV
1592+ <bb newexit>:
1593+ sum_y = PHI <sum_c (newheader)>
1594
7c82d827 1595 <bb exit>:
712cb0bb
TV
1596- sum_z = PHI <sum_b (cond[1]), ...>
1597+ sum_z = PHI <sum_y (newexit), ...>
7c82d827
TV
1598
1599 Note: the example does not show any virtual phis, but these are handled more
1600 or less as reductions.
b8698a0f 1601
7c82d827
TV
1602
1603 Moves the exit condition of LOOP to the beginning of its header.
1604 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
1605 bound. */
1606
1607static void
1608transform_to_exit_first_loop_alt (struct loop *loop,
1609 reduction_info_table_type *reduction_list,
1610 tree bound)
1611{
1612 basic_block header = loop->header;
1613 basic_block latch = loop->latch;
1614 edge exit = single_dom_exit (loop);
1615 basic_block exit_block = exit->dest;
1616 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1617 tree control = gimple_cond_lhs (cond_stmt);
1618 edge e;
1619
338392ed
TV
1620 /* Rewriting virtuals into loop-closed ssa normal form makes this
1621 transformation simpler. It also ensures that the virtuals are in
1622 loop-closed ssa normal from after the transformation, which is required by
1623 create_parallel_loop. */
1624 rewrite_virtuals_into_loop_closed_ssa (loop);
7c82d827
TV
1625
1626 /* Create the new_header block. */
1627 basic_block new_header = split_block_before_cond_jump (exit->src);
712cb0bb 1628 edge edge_at_split = single_pred_edge (new_header);
7c82d827
TV
1629
1630 /* Redirect entry edge to new_header. */
1631 edge entry = loop_preheader_edge (loop);
1632 e = redirect_edge_and_branch (entry, new_header);
1633 gcc_assert (e == entry);
1634
1635 /* Redirect post_inc_edge to new_header. */
1636 edge post_inc_edge = single_succ_edge (latch);
1637 e = redirect_edge_and_branch (post_inc_edge, new_header);
1638 gcc_assert (e == post_inc_edge);
1639
1640 /* Redirect post_cond_edge to header. */
1641 edge post_cond_edge = single_pred_edge (latch);
1642 e = redirect_edge_and_branch (post_cond_edge, header);
1643 gcc_assert (e == post_cond_edge);
1644
712cb0bb
TV
1645 /* Redirect edge_at_split to latch. */
1646 e = redirect_edge_and_branch (edge_at_split, latch);
1647 gcc_assert (e == edge_at_split);
7c82d827
TV
1648
1649 /* Set the new loop bound. */
1650 gimple_cond_set_rhs (cond_stmt, bound);
5a5fd951 1651 update_stmt (cond_stmt);
7c82d827
TV
1652
1653 /* Repair the ssa. */
1654 vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
1655 edge_var_map *vm;
1656 gphi_iterator gsi;
338392ed 1657 int i;
7c82d827
TV
1658 for (gsi = gsi_start_phis (header), i = 0;
1659 !gsi_end_p (gsi) && v->iterate (i, &vm);
1660 gsi_next (&gsi), i++)
1661 {
1662 gphi *phi = gsi.phi ();
1663 tree res_a = PHI_RESULT (phi);
1664
1665 /* Create new phi. */
1666 tree res_c = copy_ssa_name (res_a, phi);
1667 gphi *nphi = create_phi_node (res_c, new_header);
1668
1669 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
1670 replace_uses_in_bb_by (res_a, res_c, new_header);
1671
1672 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
1673 add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
1674
338392ed 1675 /* Replace sum_b with sum_c in exit phi. */
7c82d827 1676 tree res_b = redirect_edge_var_map_def (vm);
338392ed 1677 replace_uses_in_bb_by (res_b, res_c, exit_block);
7c82d827
TV
1678
1679 struct reduction_info *red = reduction_phi (reduction_list, phi);
1680 gcc_assert (virtual_operand_p (res_a)
1681 || res_a == control
1682 || red != NULL);
1683
1684 if (red)
1685 {
1686 /* Register the new reduction phi. */
1687 red->reduc_phi = nphi;
1688 gimple_set_uid (red->reduc_phi, red->reduc_version);
1689 }
1690 }
1691 gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
7c82d827
TV
1692
1693 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
1694 flush_pending_stmts (entry);
1695
1696 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
1697 flush_pending_stmts (post_inc_edge);
1698
712cb0bb
TV
1699 /* Create a new empty exit block, inbetween the new loop header and the old
1700 exit block. The function separate_decls_in_region needs this block to
1701 insert code that is active on loop exit, but not any other path. */
1702 basic_block new_exit_block = split_edge (exit);
1703
1704 /* Insert and register the reduction exit phis. */
7c82d827
TV
1705 for (gphi_iterator gsi = gsi_start_phis (exit_block);
1706 !gsi_end_p (gsi);
1707 gsi_next (&gsi))
1708 {
1709 gphi *phi = gsi.phi ();
1710 tree res_z = PHI_RESULT (phi);
712cb0bb
TV
1711
1712 /* Now that we have a new exit block, duplicate the phi of the old exit
1713 block in the new exit block to preserve loop-closed ssa. */
1714 edge succ_new_exit_block = single_succ_edge (new_exit_block);
1715 edge pred_new_exit_block = single_pred_edge (new_exit_block);
1716 tree res_y = copy_ssa_name (res_z, phi);
1717 gphi *nphi = create_phi_node (res_y, new_exit_block);
1718 tree res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
1719 add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
1720 add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
1721
7c82d827
TV
1722 if (virtual_operand_p (res_z))
1723 continue;
1724
355fe088 1725 gimple *reduc_phi = SSA_NAME_DEF_STMT (res_c);
7c82d827
TV
1726 struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
1727 if (red != NULL)
712cb0bb 1728 red->keep_res = nphi;
7c82d827
TV
1729 }
1730
1731 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
1732 then we're still using some fields, so only bother about fields that are
1733 still used: header and latch.
1734 The loop has a new header bb, so we update it. The latch bb stays the
1735 same. */
1736 loop->header = new_header;
1737
1738 /* Recalculate dominance info. */
1739 free_dominance_info (CDI_DOMINATORS);
1740 calculate_dominance_info (CDI_DOMINATORS);
1741}
1742
1743/* Tries to moves the exit condition of LOOP to the beginning of its header
1744 without duplication of the loop body. NIT is the number of iterations of the
1745 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
1746 transformation is successful. */
1747
1748static bool
1749try_transform_to_exit_first_loop_alt (struct loop *loop,
1750 reduction_info_table_type *reduction_list,
1751 tree nit)
1752{
1753 /* Check whether the latch contains a single statement. */
1b7f61eb
TV
1754 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
1755 return false;
7c82d827
TV
1756
1757 /* Check whether the latch contains the loop iv increment. */
1758 edge back = single_succ_edge (loop->latch);
1759 edge exit = single_dom_exit (loop);
1760 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1761 tree control = gimple_cond_lhs (cond_stmt);
1762 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
1763 tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
1764 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
1765 return false;
1766
1767 /* Check whether there's no code between the loop condition and the latch. */
1768 if (!single_pred_p (loop->latch)
1769 || single_pred (loop->latch) != exit->src)
1770 return false;
1771
1772 tree alt_bound = NULL_TREE;
1773 tree nit_type = TREE_TYPE (nit);
1774
1775 /* Figure out whether nit + 1 overflows. */
1776 if (TREE_CODE (nit) == INTEGER_CST)
1777 {
1778 if (!tree_int_cst_equal (nit, TYPE_MAXVAL (nit_type)))
1779 {
1780 alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
1781 nit, build_one_cst (nit_type));
1782
1783 gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
fd7b3ef5
TV
1784 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1785 return true;
7c82d827
TV
1786 }
1787 else
1788 {
1789 /* Todo: Figure out if we can trigger this, if it's worth to handle
1790 optimally, and if we can handle it optimally. */
fd7b3ef5 1791 return false;
7c82d827
TV
1792 }
1793 }
7c82d827 1794
fd7b3ef5 1795 gcc_assert (TREE_CODE (nit) == SSA_NAME);
7c82d827 1796
4f75d608
TV
1797 /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
1798 iv with base 0 and step 1 that is incremented in the latch, like this:
1799
1800 <bb header>:
1801 # iv_1 = PHI <0 (preheader), iv_2 (latch)>
1802 ...
1803 if (iv_1 < nit)
1804 goto <bb latch>;
1805 else
1806 goto <bb exit>;
1807
1808 <bb latch>:
1809 iv_2 = iv_1 + 1;
1810 goto <bb header>;
1811
1812 The range of iv_1 is [0, nit]. The latch edge is taken for
1813 iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit. So the
1814 number of latch executions is equal to nit.
1815
1816 The function max_loop_iterations gives us the maximum number of latch
1817 executions, so it gives us the maximum value of nit. */
1818 widest_int nit_max;
1819 if (!max_loop_iterations (loop, &nit_max))
1820 return false;
1821
1822 /* Check if nit + 1 overflows. */
1823 widest_int type_max = wi::to_widest (TYPE_MAXVAL (nit_type));
1824 if (!wi::lts_p (nit_max, type_max))
1825 return false;
1826
355fe088 1827 gimple *def = SSA_NAME_DEF_STMT (nit);
7c82d827 1828
4f75d608 1829 /* Try to find nit + 1, in the form of n in an assignment nit = n - 1. */
fd7b3ef5
TV
1830 if (def
1831 && is_gimple_assign (def)
1832 && gimple_assign_rhs_code (def) == PLUS_EXPR)
1833 {
1834 tree op1 = gimple_assign_rhs1 (def);
1835 tree op2 = gimple_assign_rhs2 (def);
1836 if (integer_minus_onep (op1))
1837 alt_bound = op2;
1838 else if (integer_minus_onep (op2))
1839 alt_bound = op1;
7c82d827
TV
1840 }
1841
9f620bf1 1842 /* If not found, insert nit + 1. */
7c82d827 1843 if (alt_bound == NULL_TREE)
9f620bf1
TV
1844 {
1845 alt_bound = fold_build2 (PLUS_EXPR, nit_type, nit,
1846 build_int_cst_type (nit_type, 1));
1847
1848 gimple_stmt_iterator gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
1849
1850 alt_bound
1851 = force_gimple_operand_gsi (&gsi, alt_bound, true, NULL_TREE, false,
1852 GSI_CONTINUE_LINKING);
1853 }
7c82d827
TV
1854
1855 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1856 return true;
1857}
1858
1859/* Moves the exit condition of LOOP to the beginning of its header. NIT is the
1860 number of iterations of the loop. REDUCTION_LIST describes the reductions in
1861 LOOP. */
5f40b3cb
ZD
1862
1863static void
4a8fb1a1 1864transform_to_exit_first_loop (struct loop *loop,
c203e8a7 1865 reduction_info_table_type *reduction_list,
4a8fb1a1 1866 tree nit)
5f40b3cb
ZD
1867{
1868 basic_block *bbs, *nbbs, ex_bb, orig_header;
1869 unsigned n;
1870 bool ok;
1871 edge exit = single_dom_exit (loop), hpred;
726a989a 1872 tree control, control_name, res, t;
538dd0b7
DM
1873 gphi *phi, *nphi;
1874 gassign *stmt;
1875 gcond *cond_stmt, *cond_nit;
48710229 1876 tree nit_1;
5f40b3cb
ZD
1877
1878 split_block_after_labels (loop->header);
1879 orig_header = single_succ (loop->header);
1880 hpred = single_succ_edge (loop->header);
1881
538dd0b7 1882 cond_stmt = as_a <gcond *> (last_stmt (exit->src));
726a989a
RB
1883 control = gimple_cond_lhs (cond_stmt);
1884 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
5f40b3cb
ZD
1885
1886 /* Make sure that we have phi nodes on exit for all loop header phis
1887 (create_parallel_loop requires that). */
538dd0b7
DM
1888 for (gphi_iterator gsi = gsi_start_phis (loop->header);
1889 !gsi_end_p (gsi);
1890 gsi_next (&gsi))
5f40b3cb 1891 {
538dd0b7 1892 phi = gsi.phi ();
5f40b3cb 1893 res = PHI_RESULT (phi);
070ecdfd 1894 t = copy_ssa_name (res, phi);
5f40b3cb 1895 SET_PHI_RESULT (phi, t);
5f40b3cb 1896 nphi = create_phi_node (res, orig_header);
9e227d60 1897 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
5f40b3cb
ZD
1898
1899 if (res == control)
1900 {
726a989a 1901 gimple_cond_set_lhs (cond_stmt, t);
5f40b3cb
ZD
1902 update_stmt (cond_stmt);
1903 control = t;
1904 }
1905 }
12037899 1906
5f40b3cb 1907 bbs = get_loop_body_in_dom_order (loop);
48710229 1908
69958396
RL
1909 for (n = 0; bbs[n] != exit->src; n++)
1910 continue;
5f40b3cb 1911 nbbs = XNEWVEC (basic_block, n);
726a989a
RB
1912 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1913 bbs + 1, n, nbbs);
5f40b3cb
ZD
1914 gcc_assert (ok);
1915 free (bbs);
1916 ex_bb = nbbs[0];
1917 free (nbbs);
1918
b8698a0f 1919 /* Other than reductions, the only gimple reg that should be copied
726a989a 1920 out of the loop is the control variable. */
69958396 1921 exit = single_dom_exit (loop);
5f40b3cb 1922 control_name = NULL_TREE;
538dd0b7
DM
1923 for (gphi_iterator gsi = gsi_start_phis (ex_bb);
1924 !gsi_end_p (gsi); )
5f40b3cb 1925 {
538dd0b7 1926 phi = gsi.phi ();
5f40b3cb 1927 res = PHI_RESULT (phi);
ea057359 1928 if (virtual_operand_p (res))
726a989a
RB
1929 {
1930 gsi_next (&gsi);
1931 continue;
1932 }
5f40b3cb 1933
a509ebb5 1934 /* Check if it is a part of reduction. If it is,
b8698a0f
L
1935 keep the phi at the reduction's keep_res field. The
1936 PHI_RESULT of this phi is the resulting value of the reduction
a509ebb5
RL
1937 variable when exiting the loop. */
1938
c203e8a7 1939 if (reduction_list->elements () > 0)
a509ebb5
RL
1940 {
1941 struct reduction_info *red;
1942
1943 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
a509ebb5
RL
1944 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1945 if (red)
726a989a
RB
1946 {
1947 red->keep_res = phi;
1948 gsi_next (&gsi);
1949 continue;
1950 }
a509ebb5 1951 }
726a989a
RB
1952 gcc_assert (control_name == NULL_TREE
1953 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
5f40b3cb 1954 control_name = res;
726a989a 1955 remove_phi_node (&gsi, false);
5f40b3cb
ZD
1956 }
1957 gcc_assert (control_name != NULL_TREE);
5f40b3cb 1958
b8698a0f 1959 /* Initialize the control variable to number of iterations
48710229 1960 according to the rhs of the exit condition. */
538dd0b7
DM
1961 gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
1962 cond_nit = as_a <gcond *> (last_stmt (exit->src));
48710229
RL
1963 nit_1 = gimple_cond_rhs (cond_nit);
1964 nit_1 = force_gimple_operand_gsi (&gsi,
1965 fold_convert (TREE_TYPE (control_name), nit_1),
726a989a 1966 false, NULL_TREE, false, GSI_SAME_STMT);
48710229 1967 stmt = gimple_build_assign (control_name, nit_1);
726a989a 1968 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1969}
1970
1971/* Create the parallel constructs for LOOP as described in gen_parallel_loop.
726a989a 1972 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
5f40b3cb
ZD
1973 NEW_DATA is the variable that should be initialized from the argument
1974 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
726a989a 1975 basic block containing GIMPLE_OMP_PARALLEL tree. */
5f40b3cb
ZD
1976
1977static basic_block
1978create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
9ff70652 1979 tree new_data, unsigned n_threads, location_t loc)
5f40b3cb 1980{
726a989a 1981 gimple_stmt_iterator gsi;
e67d7a1e 1982 basic_block bb, paral_bb, for_bb, ex_bb, continue_bb;
0f900dfa 1983 tree t, param;
538dd0b7 1984 gomp_parallel *omp_par_stmt;
355fe088
TS
1985 gimple *omp_return_stmt1, *omp_return_stmt2;
1986 gimple *phi;
538dd0b7
DM
1987 gcond *cond_stmt;
1988 gomp_for *for_stmt;
1989 gomp_continue *omp_cont_stmt;
726a989a 1990 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
5f40b3cb
ZD
1991 edge exit, nexit, guard, end, e;
1992
726a989a 1993 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
5f40b3cb
ZD
1994 bb = loop_preheader_edge (loop)->src;
1995 paral_bb = single_pred (bb);
726a989a 1996 gsi = gsi_last_bb (paral_bb);
5f40b3cb 1997
9ff70652 1998 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
5f40b3cb 1999 OMP_CLAUSE_NUM_THREADS_EXPR (t)
a509ebb5 2000 = build_int_cst (integer_type_node, n_threads);
538dd0b7
DM
2001 omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
2002 gimple_set_location (omp_par_stmt, loc);
5f40b3cb 2003
538dd0b7 2004 gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
5f40b3cb
ZD
2005
2006 /* Initialize NEW_DATA. */
2007 if (data)
2008 {
538dd0b7
DM
2009 gassign *assign_stmt;
2010
726a989a
RB
2011 gsi = gsi_after_labels (bb);
2012
b731b390 2013 param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
538dd0b7
DM
2014 assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
2015 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
726a989a 2016
538dd0b7 2017 assign_stmt = gimple_build_assign (new_data,
726a989a 2018 fold_convert (TREE_TYPE (new_data), param));
538dd0b7 2019 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5f40b3cb
ZD
2020 }
2021
726a989a 2022 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
5f40b3cb 2023 bb = split_loop_exit_edge (single_dom_exit (loop));
726a989a 2024 gsi = gsi_last_bb (bb);
538dd0b7
DM
2025 omp_return_stmt1 = gimple_build_omp_return (false);
2026 gimple_set_location (omp_return_stmt1, loc);
2027 gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
5f40b3cb 2028
726a989a 2029 /* Extract data for GIMPLE_OMP_FOR. */
5f40b3cb 2030 gcc_assert (loop->header == single_dom_exit (loop)->src);
538dd0b7 2031 cond_stmt = as_a <gcond *> (last_stmt (loop->header));
5f40b3cb 2032
726a989a 2033 cvar = gimple_cond_lhs (cond_stmt);
5f40b3cb
ZD
2034 cvar_base = SSA_NAME_VAR (cvar);
2035 phi = SSA_NAME_DEF_STMT (cvar);
2036 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
b731b390 2037 initvar = copy_ssa_name (cvar);
5f40b3cb
ZD
2038 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2039 initvar);
2040 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2041
1dff453d 2042 gsi = gsi_last_nondebug_bb (loop->latch);
726a989a
RB
2043 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2044 gsi_remove (&gsi, true);
5f40b3cb
ZD
2045
2046 /* Prepare cfg. */
2047 for_bb = split_edge (loop_preheader_edge (loop));
2048 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2049 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2050 gcc_assert (exit == single_dom_exit (loop));
2051
2052 guard = make_edge (for_bb, ex_bb, 0);
e67d7a1e
TV
2053 /* Split the latch edge, so LOOPS_HAVE_SIMPLE_LATCHES is still valid. */
2054 loop->latch = split_edge (single_succ_edge (loop->latch));
2055 single_pred_edge (loop->latch)->flags = 0;
2056 end = make_edge (single_pred (loop->latch), ex_bb, EDGE_FALLTHRU);
2057 rescan_loop_exit (end, true, false);
2058
538dd0b7
DM
2059 for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2060 !gsi_end_p (gpi); gsi_next (&gpi))
5f40b3cb 2061 {
f5045c96 2062 source_location locus;
538dd0b7 2063 gphi *phi = gpi.phi ();
7781d262 2064 tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
355fe088 2065 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
538dd0b7 2066
7781d262
TV
2067 /* If the exit phi is not connected to a header phi in the same loop, this
2068 value is not modified in the loop, and we're done with this phi. */
2069 if (!(gimple_code (def_stmt) == GIMPLE_PHI
2070 && gimple_bb (def_stmt) == loop->header))
2071 continue;
f5045c96 2072
7781d262 2073 gphi *stmt = as_a <gphi *> (def_stmt);
f5045c96 2074 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
b8698a0f 2075 locus = gimple_phi_arg_location_from_edge (stmt,
f5045c96 2076 loop_preheader_edge (loop));
9e227d60 2077 add_phi_arg (phi, def, guard, locus);
f5045c96
AM
2078
2079 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2080 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
9e227d60 2081 add_phi_arg (phi, def, end, locus);
5f40b3cb
ZD
2082 }
2083 e = redirect_edge_and_branch (exit, nexit->dest);
2084 PENDING_STMT (e) = NULL;
2085
726a989a
RB
2086 /* Emit GIMPLE_OMP_FOR. */
2087 gimple_cond_set_lhs (cond_stmt, cvar_base);
5f40b3cb 2088 type = TREE_TYPE (cvar);
9ff70652 2089 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
f7f18684 2090 int chunk_size = PARAM_VALUE (PARAM_PARLOOPS_CHUNK_SIZE);
1f600fea
TV
2091 enum PARAM_PARLOOPS_SCHEDULE_KIND schedule_type \
2092 = (enum PARAM_PARLOOPS_SCHEDULE_KIND) PARAM_VALUE (PARAM_PARLOOPS_SCHEDULE);
2093 switch (schedule_type)
2094 {
2095 case PARAM_PARLOOPS_SCHEDULE_KIND_static:
2096 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2097 break;
2098 case PARAM_PARLOOPS_SCHEDULE_KIND_dynamic:
2099 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_DYNAMIC;
2100 break;
2101 case PARAM_PARLOOPS_SCHEDULE_KIND_guided:
2102 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_GUIDED;
2103 break;
2104 case PARAM_PARLOOPS_SCHEDULE_KIND_auto:
2105 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_AUTO;
2106 chunk_size = 0;
2107 break;
2108 case PARAM_PARLOOPS_SCHEDULE_KIND_runtime:
2109 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_RUNTIME;
2110 chunk_size = 0;
2111 break;
2112 default:
2113 gcc_unreachable ();
2114 }
f7f18684
TV
2115 if (chunk_size != 0)
2116 OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t)
2117 = build_int_cst (integer_type_node, chunk_size);
5f40b3cb 2118
74bf76ed 2119 for_stmt = gimple_build_omp_for (NULL, GF_OMP_FOR_KIND_FOR, t, 1, NULL);
9ff70652 2120 gimple_set_location (for_stmt, loc);
726a989a
RB
2121 gimple_omp_for_set_index (for_stmt, 0, initvar);
2122 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2123 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2124 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2125 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2126 cvar_base,
2127 build_int_cst (type, 1)));
2128
2129 gsi = gsi_last_bb (for_bb);
2130 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
5f40b3cb
ZD
2131 SSA_NAME_DEF_STMT (initvar) = for_stmt;
2132
726a989a 2133 /* Emit GIMPLE_OMP_CONTINUE. */
e67d7a1e
TV
2134 continue_bb = single_pred (loop->latch);
2135 gsi = gsi_last_bb (continue_bb);
538dd0b7
DM
2136 omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2137 gimple_set_location (omp_cont_stmt, loc);
2138 gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2139 SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
5f40b3cb 2140
726a989a
RB
2141 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2142 gsi = gsi_last_bb (ex_bb);
538dd0b7
DM
2143 omp_return_stmt2 = gimple_build_omp_return (true);
2144 gimple_set_location (omp_return_stmt2, loc);
2145 gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
5f40b3cb 2146
cd7d9fd7
RG
2147 /* After the above dom info is hosed. Re-compute it. */
2148 free_dominance_info (CDI_DOMINATORS);
2149 calculate_dominance_info (CDI_DOMINATORS);
2150
5f40b3cb
ZD
2151 return paral_bb;
2152}
2153
08dab97a
RL
2154/* Generates code to execute the iterations of LOOP in N_THREADS
2155 threads in parallel.
2156
2157 NITER describes number of iterations of LOOP.
fa10beec 2158 REDUCTION_LIST describes the reductions existent in the LOOP. */
5f40b3cb
ZD
2159
2160static void
c203e8a7
TS
2161gen_parallel_loop (struct loop *loop,
2162 reduction_info_table_type *reduction_list,
a509ebb5 2163 unsigned n_threads, struct tree_niter_desc *niter)
5f40b3cb 2164{
5f40b3cb 2165 tree many_iterations_cond, type, nit;
726a989a
RB
2166 tree arg_struct, new_arg_struct;
2167 gimple_seq stmts;
9f9f72aa 2168 edge entry, exit;
a509ebb5 2169 struct clsn_data clsn_data;
5f40b3cb 2170 unsigned prob;
9ff70652 2171 location_t loc;
355fe088 2172 gimple *cond_stmt;
768da0da 2173 unsigned int m_p_thread=2;
5f40b3cb
ZD
2174
2175 /* From
2176
2177 ---------------------------------------------------------------------
2178 loop
2179 {
2180 IV = phi (INIT, IV + STEP)
2181 BODY1;
2182 if (COND)
2183 break;
2184 BODY2;
2185 }
2186 ---------------------------------------------------------------------
2187
2188 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2189 we generate the following code:
2190
2191 ---------------------------------------------------------------------
2192
2193 if (MAY_BE_ZERO
a509ebb5
RL
2194 || NITER < MIN_PER_THREAD * N_THREADS)
2195 goto original;
5f40b3cb
ZD
2196
2197 BODY1;
2198 store all local loop-invariant variables used in body of the loop to DATA.
726a989a 2199 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
5f40b3cb 2200 load the variables from DATA.
726a989a 2201 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
5f40b3cb
ZD
2202 BODY2;
2203 BODY1;
726a989a
RB
2204 GIMPLE_OMP_CONTINUE;
2205 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
2206 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
5f40b3cb
ZD
2207 goto end;
2208
2209 original:
2210 loop
2211 {
2212 IV = phi (INIT, IV + STEP)
2213 BODY1;
2214 if (COND)
2215 break;
2216 BODY2;
2217 }
2218
2219 end:
2220
2221 */
2222
2223 /* Create two versions of the loop -- in the old one, we know that the
2224 number of iterations is large enough, and we will transform it into the
2225 loop that will be split to loop_fn, the new one will be used for the
2226 remaining iterations. */
a509ebb5 2227
768da0da
RL
2228 /* We should compute a better number-of-iterations value for outer loops.
2229 That is, if we have
2230
2231 for (i = 0; i < n; ++i)
2232 for (j = 0; j < m; ++j)
2233 ...
2234
2235 we should compute nit = n * m, not nit = n.
2236 Also may_be_zero handling would need to be adjusted. */
2237
5f40b3cb
ZD
2238 type = TREE_TYPE (niter->niter);
2239 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
2240 NULL_TREE);
2241 if (stmts)
726a989a 2242 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb 2243
768da0da
RL
2244 if (loop->inner)
2245 m_p_thread=2;
2246 else
2247 m_p_thread=MIN_PER_THREAD;
2248
2249 many_iterations_cond =
2250 fold_build2 (GE_EXPR, boolean_type_node,
2251 nit, build_int_cst (type, m_p_thread * n_threads));
2252
5f40b3cb 2253 many_iterations_cond
a509ebb5
RL
2254 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2255 invert_truthvalue (unshare_expr (niter->may_be_zero)),
2256 many_iterations_cond);
5f40b3cb 2257 many_iterations_cond
a509ebb5 2258 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
5f40b3cb 2259 if (stmts)
726a989a 2260 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb
ZD
2261 if (!is_gimple_condexpr (many_iterations_cond))
2262 {
2263 many_iterations_cond
a509ebb5
RL
2264 = force_gimple_operand (many_iterations_cond, &stmts,
2265 true, NULL_TREE);
5f40b3cb 2266 if (stmts)
726a989a 2267 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb
ZD
2268 }
2269
2270 initialize_original_copy_tables ();
2271
2272 /* We assume that the loop usually iterates a lot. */
2273 prob = 4 * REG_BR_PROB_BASE / 5;
0f900dfa
JJ
2274 loop_version (loop, many_iterations_cond, NULL,
2275 prob, prob, REG_BR_PROB_BASE - prob, true);
5f40b3cb
ZD
2276 update_ssa (TODO_update_ssa);
2277 free_original_copy_tables ();
2278
2279 /* Base all the induction variables in LOOP on a single control one. */
c80a5403 2280 canonicalize_loop_ivs (loop, &nit, true);
5f40b3cb 2281
7c82d827
TV
2282 /* Ensure that the exit condition is the first statement in the loop.
2283 The common case is that latch of the loop is empty (apart from the
2284 increment) and immediately follows the loop exit test. Attempt to move the
2285 entry of the loop directly before the exit check and increase the number of
2286 iterations of the loop by one. */
a5a57bf3
TV
2287 if (try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
2288 {
2289 if (dump_file
2290 && (dump_flags & TDF_DETAILS))
2291 fprintf (dump_file,
2292 "alternative exit-first loop transform succeeded"
2293 " for loop %d\n", loop->num);
2294 }
2295 else
7c82d827
TV
2296 {
2297 /* Fall back on the method that handles more cases, but duplicates the
2298 loop body: move the exit condition of LOOP to the beginning of its
2299 header, and duplicate the part of the last iteration that gets disabled
2300 to the exit of the loop. */
2301 transform_to_exit_first_loop (loop, reduction_list, nit);
2302 }
a509ebb5 2303
fa10beec 2304 /* Generate initializations for reductions. */
c203e8a7
TS
2305 if (reduction_list->elements () > 0)
2306 reduction_list->traverse <struct loop *, initialize_reductions> (loop);
5f40b3cb
ZD
2307
2308 /* Eliminate the references to local variables from the loop. */
9f9f72aa
AP
2309 gcc_assert (single_exit (loop));
2310 entry = loop_preheader_edge (loop);
2311 exit = single_dom_exit (loop);
5f40b3cb 2312
9f9f72aa 2313 eliminate_local_variables (entry, exit);
5f40b3cb
ZD
2314 /* In the old loop, move all variables non-local to the loop to a structure
2315 and back, and create separate decls for the variables used in loop. */
b8698a0f 2316 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
9f9f72aa 2317 &new_arg_struct, &clsn_data);
5f40b3cb
ZD
2318
2319 /* Create the parallel constructs. */
9ff70652
JJ
2320 loc = UNKNOWN_LOCATION;
2321 cond_stmt = last_stmt (loop->header);
2322 if (cond_stmt)
2323 loc = gimple_location (cond_stmt);
18751894
TV
2324 create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
2325 new_arg_struct, n_threads, loc);
c203e8a7 2326 if (reduction_list->elements () > 0)
a509ebb5 2327 create_call_for_reduction (loop, reduction_list, &clsn_data);
5f40b3cb
ZD
2328
2329 scev_reset ();
2330
92a6bdbd
SP
2331 /* Free loop bound estimations that could contain references to
2332 removed statements. */
f0bd40b1 2333 FOR_EACH_LOOP (loop, 0)
92a6bdbd 2334 free_numbers_of_iterations_estimates_loop (loop);
5f40b3cb
ZD
2335}
2336
9857228c
SP
2337/* Returns true when LOOP contains vector phi nodes. */
2338
2339static bool
726a989a 2340loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
9857228c
SP
2341{
2342 unsigned i;
2343 basic_block *bbs = get_loop_body_in_dom_order (loop);
538dd0b7 2344 gphi_iterator gsi;
9857228c 2345 bool res = true;
9857228c
SP
2346
2347 for (i = 0; i < loop->num_nodes; i++)
726a989a 2348 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
538dd0b7 2349 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
9857228c
SP
2350 goto end;
2351
2352 res = false;
2353 end:
2354 free (bbs);
2355 return res;
2356}
2357
08dab97a
RL
2358/* Create a reduction_info struct, initialize it with REDUC_STMT
2359 and PHI, insert it to the REDUCTION_LIST. */
2360
2361static void
c203e8a7 2362build_new_reduction (reduction_info_table_type *reduction_list,
355fe088 2363 gimple *reduc_stmt, gphi *phi)
08dab97a 2364{
4a8fb1a1 2365 reduction_info **slot;
08dab97a 2366 struct reduction_info *new_reduction;
12efb1d7 2367 enum tree_code reduction_code;
08dab97a
RL
2368
2369 gcc_assert (reduc_stmt);
b8698a0f 2370
08dab97a
RL
2371 if (dump_file && (dump_flags & TDF_DETAILS))
2372 {
2373 fprintf (dump_file,
2374 "Detected reduction. reduction stmt is: \n");
2375 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
2376 fprintf (dump_file, "\n");
2377 }
b8698a0f 2378
12efb1d7
TV
2379 if (gimple_code (reduc_stmt) == GIMPLE_PHI)
2380 {
2381 tree op1 = PHI_ARG_DEF (reduc_stmt, 0);
355fe088 2382 gimple *def1 = SSA_NAME_DEF_STMT (op1);
12efb1d7
TV
2383 reduction_code = gimple_assign_rhs_code (def1);
2384 }
2385
2386 else
2387 reduction_code = gimple_assign_rhs_code (reduc_stmt);
2388
08dab97a 2389 new_reduction = XCNEW (struct reduction_info);
b8698a0f 2390
08dab97a
RL
2391 new_reduction->reduc_stmt = reduc_stmt;
2392 new_reduction->reduc_phi = phi;
5d1fd1de 2393 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
12efb1d7 2394 new_reduction->reduction_code = reduction_code;
c203e8a7 2395 slot = reduction_list->find_slot (new_reduction, INSERT);
08dab97a
RL
2396 *slot = new_reduction;
2397}
2398
5d1fd1de
JJ
2399/* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
2400
4a8fb1a1
LC
2401int
2402set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
5d1fd1de 2403{
4a8fb1a1 2404 struct reduction_info *const red = *slot;
5d1fd1de
JJ
2405 gimple_set_uid (red->reduc_phi, red->reduc_version);
2406 return 1;
2407}
2408
08dab97a
RL
2409/* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
2410
2411static void
c203e8a7 2412gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
08dab97a 2413{
538dd0b7 2414 gphi_iterator gsi;
08dab97a 2415 loop_vec_info simple_loop_info;
12efb1d7
TV
2416 loop_vec_info simple_inner_loop_info = NULL;
2417 bool allow_double_reduc = true;
08dab97a 2418
08dab97a 2419 simple_loop_info = vect_analyze_loop_form (loop);
1e6a7b01
TV
2420 if (simple_loop_info == NULL)
2421 return;
08dab97a
RL
2422
2423 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2424 {
538dd0b7 2425 gphi *phi = gsi.phi ();
08dab97a
RL
2426 affine_iv iv;
2427 tree res = PHI_RESULT (phi);
2428 bool double_reduc;
2429
ea057359 2430 if (virtual_operand_p (res))
08dab97a
RL
2431 continue;
2432
1e6a7b01
TV
2433 if (simple_iv (loop, loop, res, &iv, true))
2434 continue;
2435
355fe088 2436 gimple *reduc_stmt
1e6a7b01
TV
2437 = vect_force_simple_reduction (simple_loop_info, phi, true,
2438 &double_reduc, true);
12efb1d7 2439 if (!reduc_stmt)
1e6a7b01
TV
2440 continue;
2441
12efb1d7
TV
2442 if (double_reduc)
2443 {
2444 if (!allow_double_reduc
2445 || loop->inner->inner != NULL)
2446 continue;
2447
2448 if (!simple_inner_loop_info)
2449 {
2450 simple_inner_loop_info = vect_analyze_loop_form (loop->inner);
2451 if (!simple_inner_loop_info)
2452 {
2453 allow_double_reduc = false;
2454 continue;
2455 }
2456 }
2457
2458 use_operand_p use_p;
355fe088 2459 gimple *inner_stmt;
12efb1d7
TV
2460 bool single_use_p = single_imm_use (res, &use_p, &inner_stmt);
2461 gcc_assert (single_use_p);
2462 gphi *inner_phi = as_a <gphi *> (inner_stmt);
2463 if (simple_iv (loop->inner, loop->inner, PHI_RESULT (inner_phi),
2464 &iv, true))
2465 continue;
2466
355fe088 2467 gimple *inner_reduc_stmt
12efb1d7
TV
2468 = vect_force_simple_reduction (simple_inner_loop_info, inner_phi,
2469 true, &double_reduc, true);
2470 gcc_assert (!double_reduc);
2471 if (inner_reduc_stmt == NULL)
2472 continue;
2473 }
2474
1e6a7b01 2475 build_new_reduction (reduction_list, reduc_stmt, phi);
08dab97a 2476 }
5d1fd1de 2477 destroy_loop_vec_info (simple_loop_info, true);
12efb1d7 2478 destroy_loop_vec_info (simple_inner_loop_info, true);
5d1fd1de
JJ
2479
2480 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2481 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2482 only now. */
c203e8a7 2483 reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
08dab97a
RL
2484}
2485
2486/* Try to initialize NITER for code generation part. */
2487
2488static bool
2489try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2490{
2491 edge exit = single_dom_exit (loop);
2492
2493 gcc_assert (exit);
2494
2495 /* We need to know # of iterations, and there should be no uses of values
2496 defined inside loop outside of it, unless the values are invariants of
2497 the loop. */
2498 if (!number_of_iterations_exit (loop, exit, niter, false))
2499 {
2500 if (dump_file && (dump_flags & TDF_DETAILS))
2501 fprintf (dump_file, " FAILED: number of iterations not known\n");
2502 return false;
2503 }
2504
2505 return true;
2506}
2507
2508/* Try to initialize REDUCTION_LIST for code generation part.
2509 REDUCTION_LIST describes the reductions. */
2510
2511static bool
4a8fb1a1 2512try_create_reduction_list (loop_p loop,
c203e8a7 2513 reduction_info_table_type *reduction_list)
08dab97a
RL
2514{
2515 edge exit = single_dom_exit (loop);
538dd0b7 2516 gphi_iterator gsi;
08dab97a
RL
2517
2518 gcc_assert (exit);
2519
2520 gather_scalar_reductions (loop, reduction_list);
2521
b8698a0f 2522
08dab97a
RL
2523 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2524 {
538dd0b7 2525 gphi *phi = gsi.phi ();
08dab97a
RL
2526 struct reduction_info *red;
2527 imm_use_iterator imm_iter;
2528 use_operand_p use_p;
355fe088 2529 gimple *reduc_phi;
08dab97a
RL
2530 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2531
ea057359 2532 if (!virtual_operand_p (val))
08dab97a
RL
2533 {
2534 if (dump_file && (dump_flags & TDF_DETAILS))
2535 {
2536 fprintf (dump_file, "phi is ");
2537 print_gimple_stmt (dump_file, phi, 0, 0);
2538 fprintf (dump_file, "arg of phi to exit: value ");
2539 print_generic_expr (dump_file, val, 0);
2540 fprintf (dump_file, " used outside loop\n");
2541 fprintf (dump_file,
2542 " checking if it a part of reduction pattern: \n");
2543 }
c203e8a7 2544 if (reduction_list->elements () == 0)
08dab97a
RL
2545 {
2546 if (dump_file && (dump_flags & TDF_DETAILS))
2547 fprintf (dump_file,
2548 " FAILED: it is not a part of reduction.\n");
2549 return false;
2550 }
2551 reduc_phi = NULL;
2552 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2553 {
4942af9b
JJ
2554 if (!gimple_debug_bind_p (USE_STMT (use_p))
2555 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
08dab97a
RL
2556 {
2557 reduc_phi = USE_STMT (use_p);
2558 break;
2559 }
2560 }
2561 red = reduction_phi (reduction_list, reduc_phi);
2562 if (red == NULL)
2563 {
2564 if (dump_file && (dump_flags & TDF_DETAILS))
2565 fprintf (dump_file,
2566 " FAILED: it is not a part of reduction.\n");
2567 return false;
2568 }
2569 if (dump_file && (dump_flags & TDF_DETAILS))
2570 {
2571 fprintf (dump_file, "reduction phi is ");
2572 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2573 fprintf (dump_file, "reduction stmt is ");
2574 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2575 }
2576 }
2577 }
2578
2579 /* The iterations of the loop may communicate only through bivs whose
2580 iteration space can be distributed efficiently. */
2581 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2582 {
538dd0b7 2583 gphi *phi = gsi.phi ();
08dab97a
RL
2584 tree def = PHI_RESULT (phi);
2585 affine_iv iv;
2586
ea057359 2587 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
08dab97a
RL
2588 {
2589 struct reduction_info *red;
2590
2591 red = reduction_phi (reduction_list, phi);
2592 if (red == NULL)
2593 {
2594 if (dump_file && (dump_flags & TDF_DETAILS))
2595 fprintf (dump_file,
2596 " FAILED: scalar dependency between iterations\n");
2597 return false;
2598 }
2599 }
2600 }
2601
2602
2603 return true;
2604}
2605
5f40b3cb
ZD
2606/* Detect parallel loops and generate parallel code using libgomp
2607 primitives. Returns true if some loop was parallelized, false
2608 otherwise. */
2609
09489eb8 2610static bool
5f40b3cb
ZD
2611parallelize_loops (void)
2612{
2613 unsigned n_threads = flag_tree_parallelize_loops;
2614 bool changed = false;
2615 struct loop *loop;
e67d7a1e 2616 struct loop *skip_loop = NULL;
5f40b3cb 2617 struct tree_niter_desc niter_desc;
f873b205 2618 struct obstack parloop_obstack;
8adfe01d 2619 HOST_WIDE_INT estimated;
b05e0233 2620 source_location loop_loc;
f873b205 2621
5f40b3cb
ZD
2622 /* Do not parallelize loops in the functions created by parallelization. */
2623 if (parallelized_function_p (cfun->decl))
2624 return false;
8adfe01d
RL
2625 if (cfun->has_nonlocal_label)
2626 return false;
5f40b3cb 2627
f873b205 2628 gcc_obstack_init (&parloop_obstack);
c203e8a7 2629 reduction_info_table_type reduction_list (10);
726a989a 2630 init_stmt_vec_info_vec ();
a509ebb5 2631
f0bd40b1 2632 FOR_EACH_LOOP (loop, 0)
5f40b3cb 2633 {
e67d7a1e
TV
2634 if (loop == skip_loop)
2635 {
2636 if (dump_file && (dump_flags & TDF_DETAILS))
2637 fprintf (dump_file,
2638 "Skipping loop %d as inner loop of parallelized loop\n",
2639 loop->num);
2640
2641 skip_loop = loop->inner;
2642 continue;
2643 }
2644 else
2645 skip_loop = NULL;
2646
4a8fb1a1 2647 reduction_list.empty ();
48710229
RL
2648 if (dump_file && (dump_flags & TDF_DETAILS))
2649 {
2650 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2651 if (loop->inner)
2652 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2653 else
2654 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2655 }
b8698a0f 2656
48710229 2657 /* If we use autopar in graphite pass, we use its marked dependency
87d4d0ee
SP
2658 checking results. */
2659 if (flag_loop_parallelize_all && !loop->can_be_parallel)
48710229
RL
2660 {
2661 if (dump_file && (dump_flags & TDF_DETAILS))
2662 fprintf (dump_file, "loop is not parallel according to graphite\n");
87d4d0ee 2663 continue;
48710229 2664 }
87d4d0ee 2665
48710229
RL
2666 if (!single_dom_exit (loop))
2667 {
b8698a0f 2668
48710229
RL
2669 if (dump_file && (dump_flags & TDF_DETAILS))
2670 fprintf (dump_file, "loop is !single_dom_exit\n");
b8698a0f 2671
08dab97a 2672 continue;
48710229 2673 }
08dab97a
RL
2674
2675 if (/* And of course, the loop must be parallelizable. */
2676 !can_duplicate_loop_p (loop)
1d4af1e8 2677 || loop_has_blocks_with_irreducible_flag (loop)
8adfe01d 2678 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
9857228c 2679 /* FIXME: the check for vector phi nodes could be removed. */
69958396 2680 || loop_has_vector_phi_nodes (loop))
08dab97a 2681 continue;
e5b332cd 2682
652c4c71 2683 estimated = estimated_stmt_executions_int (loop);
e5b332cd
RG
2684 if (estimated == -1)
2685 estimated = max_stmt_executions_int (loop);
87d4d0ee 2686 /* FIXME: Bypass this check as graphite doesn't update the
e5b332cd 2687 count and frequency correctly now. */
87d4d0ee 2688 if (!flag_loop_parallelize_all
e5b332cd
RG
2689 && ((estimated != -1
2690 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
87d4d0ee
SP
2691 /* Do not bother with loops in cold areas. */
2692 || optimize_loop_nest_for_size_p (loop)))
08dab97a 2693 continue;
b8698a0f 2694
08dab97a
RL
2695 if (!try_get_loop_niter (loop, &niter_desc))
2696 continue;
2697
c203e8a7 2698 if (!try_create_reduction_list (loop, &reduction_list))
08dab97a
RL
2699 continue;
2700
f873b205
LB
2701 if (!flag_loop_parallelize_all
2702 && !loop_parallel_p (loop, &parloop_obstack))
5f40b3cb
ZD
2703 continue;
2704
2705 changed = true;
e67d7a1e 2706 skip_loop = loop->inner;
48710229
RL
2707 if (dump_file && (dump_flags & TDF_DETAILS))
2708 {
48710229 2709 if (loop->inner)
8adfe01d 2710 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
48710229 2711 else
8adfe01d
RL
2712 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2713 loop_loc = find_loop_location (loop);
b05e0233 2714 if (loop_loc != UNKNOWN_LOCATION)
8adfe01d 2715 fprintf (dump_file, "\nloop at %s:%d: ",
b05e0233 2716 LOCATION_FILE (loop_loc), LOCATION_LINE (loop_loc));
b8698a0f 2717 }
c203e8a7 2718 gen_parallel_loop (loop, &reduction_list,
08dab97a 2719 n_threads, &niter_desc);
5f40b3cb
ZD
2720 }
2721
726a989a 2722 free_stmt_vec_info_vec ();
f873b205 2723 obstack_free (&parloop_obstack, NULL);
6b8ed145
RG
2724
2725 /* Parallelization will cause new function calls to be inserted through
d086d311
RG
2726 which local variables will escape. Reset the points-to solution
2727 for ESCAPED. */
6b8ed145 2728 if (changed)
d086d311 2729 pt_solution_reset (&cfun->gimple_df->escaped);
6b8ed145 2730
5f40b3cb
ZD
2731 return changed;
2732}
2733
c1bf2a39
AM
2734/* Parallelization. */
2735
c1bf2a39
AM
2736namespace {
2737
2738const pass_data pass_data_parallelize_loops =
2739{
2740 GIMPLE_PASS, /* type */
2741 "parloops", /* name */
2742 OPTGROUP_LOOP, /* optinfo_flags */
c1bf2a39
AM
2743 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
2744 ( PROP_cfg | PROP_ssa ), /* properties_required */
2745 0, /* properties_provided */
2746 0, /* properties_destroyed */
2747 0, /* todo_flags_start */
3bea341f 2748 0, /* todo_flags_finish */
c1bf2a39
AM
2749};
2750
2751class pass_parallelize_loops : public gimple_opt_pass
2752{
2753public:
2754 pass_parallelize_loops (gcc::context *ctxt)
2755 : gimple_opt_pass (pass_data_parallelize_loops, ctxt)
2756 {}
2757
2758 /* opt_pass methods: */
1a3d085c 2759 virtual bool gate (function *) { return flag_tree_parallelize_loops > 1; }
be55bfe6 2760 virtual unsigned int execute (function *);
c1bf2a39
AM
2761
2762}; // class pass_parallelize_loops
2763
be55bfe6
TS
2764unsigned
2765pass_parallelize_loops::execute (function *fun)
2766{
2767 if (number_of_loops (fun) <= 1)
2768 return 0;
2769
2770 if (parallelize_loops ())
18751894
TV
2771 {
2772 fun->curr_properties &= ~(PROP_gimple_eomp);
e67d7a1e
TV
2773
2774#ifdef ENABLE_CHECKING
2775 verify_loop_structure ();
2776#endif
2777
18751894
TV
2778 return TODO_update_ssa;
2779 }
2780
be55bfe6
TS
2781 return 0;
2782}
2783
c1bf2a39
AM
2784} // anon namespace
2785
2786gimple_opt_pass *
2787make_pass_parallelize_loops (gcc::context *ctxt)
2788{
2789 return new pass_parallelize_loops (ctxt);
2790}