]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-parloops.c
re PR c++/67845 (ICE on invalid use of const qualifier on x86_64-linux-gnu in merge_t...
[thirdparty/gcc.git] / gcc / tree-parloops.c
CommitLineData
5f40b3cb 1/* Loop autoparallelization.
5624e564 2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
70837b71
RL
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5f40b3cb
ZD
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
6da7fc87 10Software Foundation; either version 3, or (at your option) any later
5f40b3cb
ZD
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
6da7fc87
NC
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
5f40b3cb
ZD
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
40e23961 25#include "alias.h"
c7131fb2 26#include "backend.h"
9fdcd34e 27#include "cfghooks.h"
40e23961 28#include "tree.h"
c7131fb2 29#include "gimple.h"
60393bbc 30#include "hard-reg-set.h"
c7131fb2
AM
31#include "ssa.h"
32#include "options.h"
33#include "fold-const.h"
2fb9a547 34#include "internal-fn.h"
45b0be94 35#include "gimplify.h"
5be5c238 36#include "gimple-iterator.h"
18f429e2 37#include "gimplify-me.h"
5be5c238 38#include "gimple-walk.h"
d8a2d370
DN
39#include "stor-layout.h"
40#include "tree-nested.h"
442b4905 41#include "tree-cfg.h"
e28030cf
AM
42#include "tree-ssa-loop-ivopts.h"
43#include "tree-ssa-loop-manip.h"
44#include "tree-ssa-loop-niter.h"
442b4905
AM
45#include "tree-ssa-loop.h"
46#include "tree-into-ssa.h"
5f40b3cb 47#include "cfgloop.h"
5f40b3cb 48#include "tree-data-ref.h"
1bd6497c 49#include "tree-scalar-evolution.h"
cf835838 50#include "gimple-pretty-print.h"
5f40b3cb 51#include "tree-pass.h"
5f40b3cb 52#include "langhooks.h"
a509ebb5 53#include "tree-vectorizer.h"
4a8fb1a1 54#include "tree-hasher.h"
c1bf2a39 55#include "tree-parloops.h"
0645c1a2 56#include "omp-low.h"
1fe37220 57#include "tree-nested.h"
a79b7ec5 58#include "cgraph.h"
7c82d827 59#include "tree-ssa.h"
f7f18684 60#include "params.h"
1f600fea 61#include "params-enum.h"
5f40b3cb
ZD
62
63/* This pass tries to distribute iterations of loops into several threads.
64 The implementation is straightforward -- for each loop we test whether its
65 iterations are independent, and if it is the case (and some additional
66 conditions regarding profitability and correctness are satisfied), we
726a989a
RB
67 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
68 machinery do its job.
b8698a0f 69
5f40b3cb
ZD
70 The most of the complexity is in bringing the code into shape expected
71 by the omp expanders:
726a989a
RB
72 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
73 variable and that the exit test is at the start of the loop body
74 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
5f40b3cb
ZD
75 variables by accesses through pointers, and breaking up ssa chains
76 by storing the values incoming to the parallelized loop to a structure
77 passed to the new function as an argument (something similar is done
78 in omp gimplification, unfortunately only a small part of the code
79 can be shared).
80
81 TODO:
82 -- if there are several parallelizable loops in a function, it may be
83 possible to generate the threads just once (using synchronization to
84 ensure that cross-loop dependences are obeyed).
70837b71
RL
85 -- handling of common reduction patterns for outer loops.
86
87 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
b8698a0f 88/*
a509ebb5 89 Reduction handling:
8a9ecffd 90 currently we use vect_force_simple_reduction() to detect reduction patterns.
a509ebb5 91 The code transformation will be introduced by an example.
b8698a0f
L
92
93
a509ebb5
RL
94parloop
95{
96 int sum=1;
97
0eb7e7aa 98 for (i = 0; i < N; i++)
a509ebb5
RL
99 {
100 x[i] = i + 3;
101 sum+=x[i];
102 }
103}
104
0eb7e7aa 105gimple-like code:
a509ebb5
RL
106header_bb:
107
0eb7e7aa
RL
108 # sum_29 = PHI <sum_11(5), 1(3)>
109 # i_28 = PHI <i_12(5), 0(3)>
110 D.1795_8 = i_28 + 3;
111 x[i_28] = D.1795_8;
112 sum_11 = D.1795_8 + sum_29;
113 i_12 = i_28 + 1;
114 if (N_6(D) > i_12)
115 goto header_bb;
116
a509ebb5
RL
117
118exit_bb:
119
0eb7e7aa
RL
120 # sum_21 = PHI <sum_11(4)>
121 printf (&"%d"[0], sum_21);
a509ebb5
RL
122
123
124after reduction transformation (only relevant parts):
125
126parloop
127{
128
129....
130
0eb7e7aa 131
fa10beec 132 # Storing the initial value given by the user. #
0eb7e7aa 133
ae0bce62 134 .paral_data_store.32.sum.27 = 1;
b8698a0f
L
135
136 #pragma omp parallel num_threads(4)
a509ebb5 137
0eb7e7aa 138 #pragma omp for schedule(static)
ae0bce62
RL
139
140 # The neutral element corresponding to the particular
141 reduction's operation, e.g. 0 for PLUS_EXPR,
142 1 for MULT_EXPR, etc. replaces the user's initial value. #
143
144 # sum.27_29 = PHI <sum.27_11, 0>
145
0eb7e7aa 146 sum.27_11 = D.1827_8 + sum.27_29;
ae0bce62 147
726a989a 148 GIMPLE_OMP_CONTINUE
a509ebb5 149
0eb7e7aa
RL
150 # Adding this reduction phi is done at create_phi_for_local_result() #
151 # sum.27_56 = PHI <sum.27_11, 0>
726a989a 152 GIMPLE_OMP_RETURN
b8698a0f
L
153
154 # Creating the atomic operation is done at
0eb7e7aa 155 create_call_for_reduction_1() #
a509ebb5 156
0eb7e7aa
RL
157 #pragma omp atomic_load
158 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
159 D.1840_60 = sum.27_56 + D.1839_59;
160 #pragma omp atomic_store (D.1840_60);
b8698a0f 161
726a989a 162 GIMPLE_OMP_RETURN
b8698a0f 163
0eb7e7aa
RL
164 # collecting the result after the join of the threads is done at
165 create_loads_for_reductions().
ae0bce62
RL
166 The value computed by the threads is loaded from the
167 shared struct. #
168
b8698a0f 169
0eb7e7aa 170 .paral_data_load.33_52 = &.paral_data_store.32;
ae0bce62 171 sum_37 = .paral_data_load.33_52->sum.27;
0eb7e7aa
RL
172 sum_43 = D.1795_41 + sum_37;
173
174 exit bb:
175 # sum_21 = PHI <sum_43, sum_26>
176 printf (&"%d"[0], sum_21);
177
178...
179
a509ebb5
RL
180}
181
182*/
183
5f40b3cb
ZD
184/* Minimal number of iterations of a loop that should be executed in each
185 thread. */
186#define MIN_PER_THREAD 100
187
b8698a0f 188/* Element of the hashtable, representing a
a509ebb5
RL
189 reduction in the current loop. */
190struct reduction_info
191{
355fe088
TS
192 gimple *reduc_stmt; /* reduction statement. */
193 gimple *reduc_phi; /* The phi node defining the reduction. */
726a989a 194 enum tree_code reduction_code;/* code for the reduction operation. */
5d1fd1de
JJ
195 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
196 result. */
538dd0b7 197 gphi *keep_res; /* The PHI_RESULT of this phi is the resulting value
a509ebb5 198 of the reduction variable when existing the loop. */
ae0bce62 199 tree initial_value; /* The initial value of the reduction var before entering the loop. */
a509ebb5 200 tree field; /* the name of the field in the parloop data structure intended for reduction. */
a509ebb5 201 tree init; /* reduction initialization value. */
538dd0b7 202 gphi *new_phi; /* (helper field) Newly created phi node whose result
a509ebb5
RL
203 will be passed to the atomic operation. Represents
204 the local result each thread computed for the reduction
205 operation. */
206};
207
4a8fb1a1 208/* Reduction info hashtable helpers. */
a509ebb5 209
95fbe13e 210struct reduction_hasher : free_ptr_hash <reduction_info>
a509ebb5 211{
67f58944
TS
212 static inline hashval_t hash (const reduction_info *);
213 static inline bool equal (const reduction_info *, const reduction_info *);
4a8fb1a1
LC
214};
215
216/* Equality and hash functions for hashtab code. */
a509ebb5 217
4a8fb1a1 218inline bool
67f58944 219reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
4a8fb1a1 220{
a509ebb5
RL
221 return (a->reduc_phi == b->reduc_phi);
222}
223
4a8fb1a1 224inline hashval_t
67f58944 225reduction_hasher::hash (const reduction_info *a)
a509ebb5 226{
5d1fd1de 227 return a->reduc_version;
a509ebb5
RL
228}
229
c203e8a7 230typedef hash_table<reduction_hasher> reduction_info_table_type;
4a8fb1a1
LC
231
232
a509ebb5 233static struct reduction_info *
355fe088 234reduction_phi (reduction_info_table_type *reduction_list, gimple *phi)
a509ebb5
RL
235{
236 struct reduction_info tmpred, *red;
237
c203e8a7 238 if (reduction_list->elements () == 0 || phi == NULL)
a509ebb5
RL
239 return NULL;
240
fdce493d
TV
241 if (gimple_uid (phi) == (unsigned int)-1
242 || gimple_uid (phi) == 0)
243 return NULL;
244
a509ebb5 245 tmpred.reduc_phi = phi;
5d1fd1de 246 tmpred.reduc_version = gimple_uid (phi);
c203e8a7 247 red = reduction_list->find (&tmpred);
fdce493d 248 gcc_assert (red == NULL || red->reduc_phi == phi);
a509ebb5
RL
249
250 return red;
251}
252
5f40b3cb
ZD
253/* Element of hashtable of names to copy. */
254
255struct name_to_copy_elt
256{
257 unsigned version; /* The version of the name to copy. */
258 tree new_name; /* The new name used in the copy. */
259 tree field; /* The field of the structure used to pass the
260 value. */
261};
262
4a8fb1a1 263/* Name copies hashtable helpers. */
5f40b3cb 264
95fbe13e 265struct name_to_copy_hasher : free_ptr_hash <name_to_copy_elt>
5f40b3cb 266{
67f58944
TS
267 static inline hashval_t hash (const name_to_copy_elt *);
268 static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
4a8fb1a1
LC
269};
270
271/* Equality and hash functions for hashtab code. */
5f40b3cb 272
4a8fb1a1 273inline bool
67f58944 274name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
4a8fb1a1 275{
5f40b3cb
ZD
276 return a->version == b->version;
277}
278
4a8fb1a1 279inline hashval_t
67f58944 280name_to_copy_hasher::hash (const name_to_copy_elt *a)
5f40b3cb 281{
5f40b3cb
ZD
282 return (hashval_t) a->version;
283}
284
c203e8a7 285typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
4a8fb1a1 286
b305e3da
SP
287/* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
288 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
289 represents the denominator for every element in the matrix. */
290typedef struct lambda_trans_matrix_s
291{
292 lambda_matrix matrix;
293 int rowsize;
294 int colsize;
295 int denominator;
296} *lambda_trans_matrix;
297#define LTM_MATRIX(T) ((T)->matrix)
298#define LTM_ROWSIZE(T) ((T)->rowsize)
299#define LTM_COLSIZE(T) ((T)->colsize)
300#define LTM_DENOMINATOR(T) ((T)->denominator)
301
302/* Allocate a new transformation matrix. */
303
304static lambda_trans_matrix
305lambda_trans_matrix_new (int colsize, int rowsize,
306 struct obstack * lambda_obstack)
307{
308 lambda_trans_matrix ret;
309
310 ret = (lambda_trans_matrix)
311 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
312 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
313 LTM_ROWSIZE (ret) = rowsize;
314 LTM_COLSIZE (ret) = colsize;
315 LTM_DENOMINATOR (ret) = 1;
316 return ret;
317}
318
319/* Multiply a vector VEC by a matrix MAT.
320 MAT is an M*N matrix, and VEC is a vector with length N. The result
321 is stored in DEST which must be a vector of length M. */
322
323static void
324lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
325 lambda_vector vec, lambda_vector dest)
326{
327 int i, j;
328
329 lambda_vector_clear (dest, m);
330 for (i = 0; i < m; i++)
331 for (j = 0; j < n; j++)
332 dest[i] += matrix[i][j] * vec[j];
333}
334
335/* Return true if TRANS is a legal transformation matrix that respects
336 the dependence vectors in DISTS and DIRS. The conservative answer
337 is false.
338
339 "Wolfe proves that a unimodular transformation represented by the
340 matrix T is legal when applied to a loop nest with a set of
341 lexicographically non-negative distance vectors RDG if and only if
342 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
343 i.e.: if and only if it transforms the lexicographically positive
344 distance vectors to lexicographically positive vectors. Note that
345 a unimodular matrix must transform the zero vector (and only it) to
346 the zero vector." S.Muchnick. */
347
348static bool
349lambda_transform_legal_p (lambda_trans_matrix trans,
350 int nb_loops,
9771b263 351 vec<ddr_p> dependence_relations)
b305e3da
SP
352{
353 unsigned int i, j;
354 lambda_vector distres;
355 struct data_dependence_relation *ddr;
356
357 gcc_assert (LTM_COLSIZE (trans) == nb_loops
358 && LTM_ROWSIZE (trans) == nb_loops);
359
360 /* When there are no dependences, the transformation is correct. */
9771b263 361 if (dependence_relations.length () == 0)
b305e3da
SP
362 return true;
363
9771b263 364 ddr = dependence_relations[0];
b305e3da
SP
365 if (ddr == NULL)
366 return true;
367
368 /* When there is an unknown relation in the dependence_relations, we
369 know that it is no worth looking at this loop nest: give up. */
370 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
371 return false;
372
373 distres = lambda_vector_new (nb_loops);
374
375 /* For each distance vector in the dependence graph. */
9771b263 376 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
b305e3da
SP
377 {
378 /* Don't care about relations for which we know that there is no
379 dependence, nor about read-read (aka. output-dependences):
380 these data accesses can happen in any order. */
381 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
382 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
383 continue;
384
385 /* Conservatively answer: "this transformation is not valid". */
386 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
387 return false;
388
389 /* If the dependence could not be captured by a distance vector,
390 conservatively answer that the transform is not valid. */
391 if (DDR_NUM_DIST_VECTS (ddr) == 0)
392 return false;
393
394 /* Compute trans.dist_vect */
395 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
396 {
397 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
398 DDR_DIST_VECT (ddr, j), distres);
399
400 if (!lambda_vector_lexico_pos (distres, nb_loops))
401 return false;
402 }
403 }
404 return true;
405}
08dab97a
RL
406
407/* Data dependency analysis. Returns true if the iterations of LOOP
408 are independent on each other (that is, if we can execute them
409 in parallel). */
5f40b3cb
ZD
410
411static bool
f873b205 412loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
5f40b3cb 413{
9771b263
DN
414 vec<ddr_p> dependence_relations;
415 vec<data_reference_p> datarefs;
5f40b3cb
ZD
416 lambda_trans_matrix trans;
417 bool ret = false;
5f40b3cb
ZD
418
419 if (dump_file && (dump_flags & TDF_DETAILS))
48710229
RL
420 {
421 fprintf (dump_file, "Considering loop %d\n", loop->num);
422 if (!loop->inner)
423 fprintf (dump_file, "loop is innermost\n");
b8698a0f 424 else
48710229
RL
425 fprintf (dump_file, "loop NOT innermost\n");
426 }
5f40b3cb 427
5f40b3cb
ZD
428 /* Check for problems with dependences. If the loop can be reversed,
429 the iterations are independent. */
00f96dc9 430 auto_vec<loop_p, 3> loop_nest;
9771b263 431 datarefs.create (10);
07687835 432 dependence_relations.create (100);
9ca3d00e
AB
433 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
434 &dependence_relations))
435 {
436 if (dump_file && (dump_flags & TDF_DETAILS))
437 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
438 ret = false;
439 goto end;
440 }
5f40b3cb
ZD
441 if (dump_file && (dump_flags & TDF_DETAILS))
442 dump_data_dependence_relations (dump_file, dependence_relations);
443
f873b205 444 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
5f40b3cb
ZD
445 LTM_MATRIX (trans)[0][0] = -1;
446
447 if (lambda_transform_legal_p (trans, 1, dependence_relations))
448 {
449 ret = true;
450 if (dump_file && (dump_flags & TDF_DETAILS))
451 fprintf (dump_file, " SUCCESS: may be parallelized\n");
452 }
453 else if (dump_file && (dump_flags & TDF_DETAILS))
a509ebb5
RL
454 fprintf (dump_file,
455 " FAILED: data dependencies exist across iterations\n");
5f40b3cb 456
9ca3d00e 457 end:
5f40b3cb
ZD
458 free_dependence_relations (dependence_relations);
459 free_data_refs (datarefs);
460
461 return ret;
462}
463
1d4af1e8
SP
464/* Return true when LOOP contains basic blocks marked with the
465 BB_IRREDUCIBLE_LOOP flag. */
466
467static inline bool
468loop_has_blocks_with_irreducible_flag (struct loop *loop)
469{
470 unsigned i;
471 basic_block *bbs = get_loop_body_in_dom_order (loop);
472 bool res = true;
473
474 for (i = 0; i < loop->num_nodes; i++)
475 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
476 goto end;
477
478 res = false;
479 end:
480 free (bbs);
481 return res;
482}
483
8a171a59 484/* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
9f9f72aa 485 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
8a171a59 486 to their addresses that can be reused. The address of OBJ is known to
cba1eb61
JJ
487 be invariant in the whole function. Other needed statements are placed
488 right before GSI. */
5f40b3cb
ZD
489
490static tree
4a8fb1a1 491take_address_of (tree obj, tree type, edge entry,
c203e8a7 492 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
5f40b3cb 493{
8a171a59 494 int uid;
83d5977e 495 tree *var_p, name, addr;
538dd0b7 496 gassign *stmt;
726a989a 497 gimple_seq stmts;
5f40b3cb 498
8a171a59
ZD
499 /* Since the address of OBJ is invariant, the trees may be shared.
500 Avoid rewriting unrelated parts of the code. */
501 obj = unshare_expr (obj);
502 for (var_p = &obj;
503 handled_component_p (*var_p);
504 var_p = &TREE_OPERAND (*var_p, 0))
505 continue;
8a171a59 506
c9a410f0
RG
507 /* Canonicalize the access to base on a MEM_REF. */
508 if (DECL_P (*var_p))
509 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
510
511 /* Assign a canonical SSA name to the address of the base decl used
512 in the address and share it for all accesses and addresses based
513 on it. */
514 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
84baa4b9
TS
515 int_tree_map elt;
516 elt.uid = uid;
517 int_tree_map *slot = decl_address->find_slot (elt, INSERT);
518 if (!slot->to)
5f40b3cb 519 {
cba1eb61
JJ
520 if (gsi == NULL)
521 return NULL;
c9a410f0 522 addr = TREE_OPERAND (*var_p, 0);
29b89442
JJ
523 const char *obj_name
524 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
525 if (obj_name)
526 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
527 else
b731b390 528 name = make_ssa_name (TREE_TYPE (addr));
83d5977e 529 stmt = gimple_build_assign (name, addr);
726a989a 530 gsi_insert_on_edge_immediate (entry, stmt);
5f40b3cb 531
84baa4b9
TS
532 slot->uid = uid;
533 slot->to = name;
5f40b3cb 534 }
8a171a59 535 else
84baa4b9 536 name = slot->to;
5f40b3cb 537
c9a410f0
RG
538 /* Express the address in terms of the canonical SSA name. */
539 TREE_OPERAND (*var_p, 0) = name;
cba1eb61
JJ
540 if (gsi == NULL)
541 return build_fold_addr_expr_with_type (obj, type);
542
aa00059c 543 name = force_gimple_operand (build_addr (obj),
c9a410f0
RG
544 &stmts, true, NULL_TREE);
545 if (!gimple_seq_empty_p (stmts))
cba1eb61 546 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
5f40b3cb 547
c9a410f0 548 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
8a171a59 549 {
726a989a 550 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
8a171a59 551 NULL_TREE);
726a989a 552 if (!gimple_seq_empty_p (stmts))
cba1eb61 553 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
8a171a59 554 }
5f40b3cb
ZD
555
556 return name;
557}
558
12efb1d7 559static tree
355fe088 560reduc_stmt_res (gimple *stmt)
12efb1d7
TV
561{
562 return (gimple_code (stmt) == GIMPLE_PHI
563 ? gimple_phi_result (stmt)
564 : gimple_assign_lhs (stmt));
565}
566
a509ebb5 567/* Callback for htab_traverse. Create the initialization statement
b8698a0f 568 for reduction described in SLOT, and place it at the preheader of
a509ebb5
RL
569 the loop described in DATA. */
570
4a8fb1a1
LC
571int
572initialize_reductions (reduction_info **slot, struct loop *loop)
a509ebb5 573{
f2c9f71d
TS
574 tree init;
575 tree type, arg;
a509ebb5
RL
576 edge e;
577
4a8fb1a1 578 struct reduction_info *const reduc = *slot;
a509ebb5 579
b8698a0f 580 /* Create initialization in preheader:
a509ebb5
RL
581 reduction_variable = initialization value of reduction. */
582
b8698a0f 583 /* In the phi node at the header, replace the argument coming
a509ebb5
RL
584 from the preheader with the reduction initialization value. */
585
f2c9f71d 586 /* Initialize the reduction. */
a509ebb5 587 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
f2c9f71d
TS
588 init = omp_reduction_init_op (gimple_location (reduc->reduc_stmt),
589 reduc->reduction_code, type);
a509ebb5
RL
590 reduc->init = init;
591
b8698a0f
L
592 /* Replace the argument representing the initialization value
593 with the initialization value for the reduction (neutral
594 element for the particular operation, e.g. 0 for PLUS_EXPR,
595 1 for MULT_EXPR, etc).
596 Keep the old value in a new variable "reduction_initial",
597 that will be taken in consideration after the parallel
0eb7e7aa 598 computing is done. */
a509ebb5
RL
599
600 e = loop_preheader_edge (loop);
601 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
602 /* Create new variable to hold the initial value. */
a509ebb5 603
a509ebb5 604 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
0eb7e7aa 605 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
ae0bce62 606 reduc->initial_value = arg;
a509ebb5
RL
607 return 1;
608}
5f40b3cb
ZD
609
610struct elv_data
611{
726a989a 612 struct walk_stmt_info info;
9f9f72aa 613 edge entry;
c203e8a7 614 int_tree_htab_type *decl_address;
cba1eb61 615 gimple_stmt_iterator *gsi;
5f40b3cb 616 bool changed;
cba1eb61 617 bool reset;
5f40b3cb
ZD
618};
619
9f9f72aa
AP
620/* Eliminates references to local variables in *TP out of the single
621 entry single exit region starting at DTA->ENTRY.
622 DECL_ADDRESS contains addresses of the references that had their
623 address taken already. If the expression is changed, CHANGED is
624 set to true. Callback for walk_tree. */
a509ebb5 625
5f40b3cb 626static tree
8a171a59 627eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
5f40b3cb 628{
3d9a9f94 629 struct elv_data *const dta = (struct elv_data *) data;
8a171a59 630 tree t = *tp, var, addr, addr_type, type, obj;
5f40b3cb
ZD
631
632 if (DECL_P (t))
633 {
634 *walk_subtrees = 0;
635
636 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
637 return NULL_TREE;
638
639 type = TREE_TYPE (t);
640 addr_type = build_pointer_type (type);
cba1eb61
JJ
641 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
642 dta->gsi);
643 if (dta->gsi == NULL && addr == NULL_TREE)
644 {
645 dta->reset = true;
646 return NULL_TREE;
647 }
648
70f34814 649 *tp = build_simple_mem_ref (addr);
5f40b3cb
ZD
650
651 dta->changed = true;
652 return NULL_TREE;
653 }
654
655 if (TREE_CODE (t) == ADDR_EXPR)
656 {
8a171a59
ZD
657 /* ADDR_EXPR may appear in two contexts:
658 -- as a gimple operand, when the address taken is a function invariant
659 -- as gimple rhs, when the resulting address in not a function
660 invariant
661 We do not need to do anything special in the latter case (the base of
662 the memory reference whose address is taken may be replaced in the
663 DECL_P case). The former case is more complicated, as we need to
664 ensure that the new address is still a gimple operand. Thus, it
665 is not sufficient to replace just the base of the memory reference --
666 we need to move the whole computation of the address out of the
667 loop. */
668 if (!is_gimple_val (t))
5f40b3cb
ZD
669 return NULL_TREE;
670
671 *walk_subtrees = 0;
8a171a59
ZD
672 obj = TREE_OPERAND (t, 0);
673 var = get_base_address (obj);
674 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
5f40b3cb
ZD
675 return NULL_TREE;
676
677 addr_type = TREE_TYPE (t);
cba1eb61
JJ
678 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
679 dta->gsi);
680 if (dta->gsi == NULL && addr == NULL_TREE)
681 {
682 dta->reset = true;
683 return NULL_TREE;
684 }
5f40b3cb
ZD
685 *tp = addr;
686
687 dta->changed = true;
688 return NULL_TREE;
689 }
690
726a989a 691 if (!EXPR_P (t))
5f40b3cb
ZD
692 *walk_subtrees = 0;
693
694 return NULL_TREE;
695}
696
cba1eb61 697/* Moves the references to local variables in STMT at *GSI out of the single
9f9f72aa
AP
698 entry single exit region starting at ENTRY. DECL_ADDRESS contains
699 addresses of the references that had their address taken
700 already. */
5f40b3cb
ZD
701
702static void
cba1eb61 703eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
c203e8a7 704 int_tree_htab_type *decl_address)
5f40b3cb
ZD
705{
706 struct elv_data dta;
355fe088 707 gimple *stmt = gsi_stmt (*gsi);
5f40b3cb 708
726a989a 709 memset (&dta.info, '\0', sizeof (dta.info));
9f9f72aa 710 dta.entry = entry;
5f40b3cb
ZD
711 dta.decl_address = decl_address;
712 dta.changed = false;
cba1eb61 713 dta.reset = false;
5f40b3cb 714
b5b8b0ac 715 if (gimple_debug_bind_p (stmt))
cba1eb61
JJ
716 {
717 dta.gsi = NULL;
718 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
719 eliminate_local_variables_1, &dta.info, NULL);
720 if (dta.reset)
721 {
722 gimple_debug_bind_reset_value (stmt);
723 dta.changed = true;
724 }
725 }
29b89442
JJ
726 else if (gimple_clobber_p (stmt))
727 {
728 stmt = gimple_build_nop ();
729 gsi_replace (gsi, stmt, false);
730 dta.changed = true;
731 }
b5b8b0ac 732 else
cba1eb61
JJ
733 {
734 dta.gsi = gsi;
735 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
736 }
5f40b3cb
ZD
737
738 if (dta.changed)
739 update_stmt (stmt);
740}
741
9f9f72aa
AP
742/* Eliminates the references to local variables from the single entry
743 single exit region between the ENTRY and EXIT edges.
b8698a0f 744
a509ebb5 745 This includes:
b8698a0f
L
746 1) Taking address of a local variable -- these are moved out of the
747 region (and temporary variable is created to hold the address if
a509ebb5 748 necessary).
9f9f72aa 749
5f40b3cb 750 2) Dereferencing a local variable -- these are replaced with indirect
a509ebb5 751 references. */
5f40b3cb
ZD
752
753static void
9f9f72aa 754eliminate_local_variables (edge entry, edge exit)
5f40b3cb 755{
9f9f72aa 756 basic_block bb;
00f96dc9 757 auto_vec<basic_block, 3> body;
5f40b3cb 758 unsigned i;
726a989a 759 gimple_stmt_iterator gsi;
cba1eb61 760 bool has_debug_stmt = false;
c203e8a7 761 int_tree_htab_type decl_address (10);
9f9f72aa
AP
762 basic_block entry_bb = entry->src;
763 basic_block exit_bb = exit->dest;
5f40b3cb 764
9f9f72aa 765 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 766
9771b263 767 FOR_EACH_VEC_ELT (body, i, bb)
9f9f72aa 768 if (bb != entry_bb && bb != exit_bb)
726a989a 769 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
ddb555ed
JJ
770 if (is_gimple_debug (gsi_stmt (gsi)))
771 {
772 if (gimple_debug_bind_p (gsi_stmt (gsi)))
773 has_debug_stmt = true;
774 }
cba1eb61 775 else
c203e8a7 776 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
cba1eb61
JJ
777
778 if (has_debug_stmt)
9771b263 779 FOR_EACH_VEC_ELT (body, i, bb)
cba1eb61
JJ
780 if (bb != entry_bb && bb != exit_bb)
781 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
782 if (gimple_debug_bind_p (gsi_stmt (gsi)))
c203e8a7 783 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
9f9f72aa
AP
784}
785
786/* Returns true if expression EXPR is not defined between ENTRY and
787 EXIT, i.e. if all its operands are defined outside of the region. */
788
789static bool
790expr_invariant_in_region_p (edge entry, edge exit, tree expr)
791{
792 basic_block entry_bb = entry->src;
793 basic_block exit_bb = exit->dest;
794 basic_block def_bb;
9f9f72aa
AP
795
796 if (is_gimple_min_invariant (expr))
797 return true;
798
799 if (TREE_CODE (expr) == SSA_NAME)
800 {
726a989a 801 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
9f9f72aa
AP
802 if (def_bb
803 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
804 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
805 return false;
806
807 return true;
808 }
809
726a989a 810 return false;
5f40b3cb
ZD
811}
812
813/* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
814 The copies are stored to NAME_COPIES, if NAME was already duplicated,
815 its duplicate stored in NAME_COPIES is returned.
b8698a0f 816
5f40b3cb
ZD
817 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
818 duplicated, storing the copies in DECL_COPIES. */
819
820static tree
c203e8a7
TS
821separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
822 int_tree_htab_type *decl_copies,
823 bool copy_name_p)
5f40b3cb
ZD
824{
825 tree copy, var, var_copy;
826 unsigned idx, uid, nuid;
84baa4b9 827 struct int_tree_map ielt;
5f40b3cb 828 struct name_to_copy_elt elt, *nelt;
4a8fb1a1 829 name_to_copy_elt **slot;
84baa4b9 830 int_tree_map *dslot;
5f40b3cb
ZD
831
832 if (TREE_CODE (name) != SSA_NAME)
833 return name;
834
835 idx = SSA_NAME_VERSION (name);
836 elt.version = idx;
c203e8a7
TS
837 slot = name_copies->find_slot_with_hash (&elt, idx,
838 copy_name_p ? INSERT : NO_INSERT);
5f40b3cb 839 if (slot && *slot)
4a8fb1a1 840 return (*slot)->new_name;
5f40b3cb 841
70b5e7dc
RG
842 if (copy_name_p)
843 {
844 copy = duplicate_ssa_name (name, NULL);
845 nelt = XNEW (struct name_to_copy_elt);
846 nelt->version = idx;
847 nelt->new_name = copy;
848 nelt->field = NULL_TREE;
849 *slot = nelt;
850 }
851 else
852 {
853 gcc_assert (!slot);
854 copy = name;
855 }
856
5f40b3cb 857 var = SSA_NAME_VAR (name);
70b5e7dc
RG
858 if (!var)
859 return copy;
860
5f40b3cb
ZD
861 uid = DECL_UID (var);
862 ielt.uid = uid;
84baa4b9
TS
863 dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
864 if (!dslot->to)
5f40b3cb
ZD
865 {
866 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
36ad7922 867 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
84baa4b9
TS
868 dslot->uid = uid;
869 dslot->to = var_copy;
5f40b3cb
ZD
870
871 /* Ensure that when we meet this decl next time, we won't duplicate
a509ebb5 872 it again. */
5f40b3cb
ZD
873 nuid = DECL_UID (var_copy);
874 ielt.uid = nuid;
84baa4b9
TS
875 dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
876 gcc_assert (!dslot->to);
877 dslot->uid = nuid;
878 dslot->to = var_copy;
5f40b3cb
ZD
879 }
880 else
84baa4b9 881 var_copy = dslot->to;
5f40b3cb 882
b2ec94d4 883 replace_ssa_name_symbol (copy, var_copy);
5f40b3cb
ZD
884 return copy;
885}
886
9f9f72aa
AP
887/* Finds the ssa names used in STMT that are defined outside the
888 region between ENTRY and EXIT and replaces such ssa names with
889 their duplicates. The duplicates are stored to NAME_COPIES. Base
890 decls of all ssa names used in STMT (including those defined in
891 LOOP) are replaced with the new temporary variables; the
892 replacement decls are stored in DECL_COPIES. */
5f40b3cb
ZD
893
894static void
355fe088 895separate_decls_in_region_stmt (edge entry, edge exit, gimple *stmt,
c203e8a7
TS
896 name_to_copy_table_type *name_copies,
897 int_tree_htab_type *decl_copies)
5f40b3cb
ZD
898{
899 use_operand_p use;
900 def_operand_p def;
901 ssa_op_iter oi;
902 tree name, copy;
903 bool copy_name_p;
904
5f40b3cb 905 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
a509ebb5
RL
906 {
907 name = DEF_FROM_PTR (def);
908 gcc_assert (TREE_CODE (name) == SSA_NAME);
9f9f72aa
AP
909 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
910 false);
a509ebb5
RL
911 gcc_assert (copy == name);
912 }
5f40b3cb
ZD
913
914 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
a509ebb5
RL
915 {
916 name = USE_FROM_PTR (use);
917 if (TREE_CODE (name) != SSA_NAME)
918 continue;
919
9f9f72aa
AP
920 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
921 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
922 copy_name_p);
a509ebb5
RL
923 SET_USE (use, copy);
924 }
5f40b3cb
ZD
925}
926
b5b8b0ac
AO
927/* Finds the ssa names used in STMT that are defined outside the
928 region between ENTRY and EXIT and replaces such ssa names with
929 their duplicates. The duplicates are stored to NAME_COPIES. Base
930 decls of all ssa names used in STMT (including those defined in
931 LOOP) are replaced with the new temporary variables; the
932 replacement decls are stored in DECL_COPIES. */
933
934static bool
355fe088 935separate_decls_in_region_debug (gimple *stmt,
c203e8a7
TS
936 name_to_copy_table_type *name_copies,
937 int_tree_htab_type *decl_copies)
b5b8b0ac
AO
938{
939 use_operand_p use;
940 ssa_op_iter oi;
941 tree var, name;
942 struct int_tree_map ielt;
943 struct name_to_copy_elt elt;
4a8fb1a1 944 name_to_copy_elt **slot;
84baa4b9 945 int_tree_map *dslot;
b5b8b0ac 946
ddb555ed
JJ
947 if (gimple_debug_bind_p (stmt))
948 var = gimple_debug_bind_get_var (stmt);
949 else if (gimple_debug_source_bind_p (stmt))
950 var = gimple_debug_source_bind_get_var (stmt);
951 else
952 return true;
598e67d7 953 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
4f2a9af8 954 return true;
b5b8b0ac
AO
955 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
956 ielt.uid = DECL_UID (var);
84baa4b9 957 dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
b5b8b0ac
AO
958 if (!dslot)
959 return true;
ddb555ed 960 if (gimple_debug_bind_p (stmt))
84baa4b9 961 gimple_debug_bind_set_var (stmt, dslot->to);
ddb555ed 962 else if (gimple_debug_source_bind_p (stmt))
84baa4b9 963 gimple_debug_source_bind_set_var (stmt, dslot->to);
b5b8b0ac
AO
964
965 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
966 {
967 name = USE_FROM_PTR (use);
968 if (TREE_CODE (name) != SSA_NAME)
969 continue;
970
971 elt.version = SSA_NAME_VERSION (name);
c203e8a7 972 slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
b5b8b0ac
AO
973 if (!slot)
974 {
975 gimple_debug_bind_reset_value (stmt);
976 update_stmt (stmt);
977 break;
978 }
979
4a8fb1a1 980 SET_USE (use, (*slot)->new_name);
b5b8b0ac
AO
981 }
982
983 return false;
984}
985
0eb7e7aa
RL
986/* Callback for htab_traverse. Adds a field corresponding to the reduction
987 specified in SLOT. The type is passed in DATA. */
988
4a8fb1a1
LC
989int
990add_field_for_reduction (reduction_info **slot, tree type)
a509ebb5 991{
b8698a0f 992
4a8fb1a1 993 struct reduction_info *const red = *slot;
12efb1d7 994 tree var = reduc_stmt_res (red->reduc_stmt);
aa06a978
RB
995 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
996 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
0eb7e7aa
RL
997
998 insert_field_into_struct (type, field);
999
1000 red->field = field;
1001
1002 return 1;
1003}
a509ebb5 1004
5f40b3cb 1005/* Callback for htab_traverse. Adds a field corresponding to a ssa name
b8698a0f 1006 described in SLOT. The type is passed in DATA. */
5f40b3cb 1007
4a8fb1a1
LC
1008int
1009add_field_for_name (name_to_copy_elt **slot, tree type)
5f40b3cb 1010{
4a8fb1a1 1011 struct name_to_copy_elt *const elt = *slot;
5f40b3cb 1012 tree name = ssa_name (elt->version);
70b5e7dc
RG
1013 tree field = build_decl (UNKNOWN_LOCATION,
1014 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1015 TREE_TYPE (name));
5f40b3cb
ZD
1016
1017 insert_field_into_struct (type, field);
1018 elt->field = field;
a509ebb5 1019
5f40b3cb
ZD
1020 return 1;
1021}
1022
b8698a0f
L
1023/* Callback for htab_traverse. A local result is the intermediate result
1024 computed by a single
fa10beec 1025 thread, or the initial value in case no iteration was executed.
b8698a0f
L
1026 This function creates a phi node reflecting these values.
1027 The phi's result will be stored in NEW_PHI field of the
1028 reduction's data structure. */
a509ebb5 1029
4a8fb1a1
LC
1030int
1031create_phi_for_local_result (reduction_info **slot, struct loop *loop)
a509ebb5 1032{
4a8fb1a1 1033 struct reduction_info *const reduc = *slot;
a509ebb5 1034 edge e;
538dd0b7 1035 gphi *new_phi;
e67d7a1e 1036 basic_block store_bb, continue_bb;
a509ebb5 1037 tree local_res;
f5045c96 1038 source_location locus;
a509ebb5 1039
b8698a0f
L
1040 /* STORE_BB is the block where the phi
1041 should be stored. It is the destination of the loop exit.
726a989a 1042 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
e67d7a1e
TV
1043 continue_bb = single_pred (loop->latch);
1044 store_bb = FALLTHRU_EDGE (continue_bb)->dest;
a509ebb5
RL
1045
1046 /* STORE_BB has two predecessors. One coming from the loop
1047 (the reduction's result is computed at the loop),
b8698a0f
L
1048 and another coming from a block preceding the loop,
1049 when no iterations
1050 are executed (the initial value should be taken). */
e67d7a1e 1051 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (continue_bb))
a509ebb5
RL
1052 e = EDGE_PRED (store_bb, 1);
1053 else
1054 e = EDGE_PRED (store_bb, 0);
12efb1d7
TV
1055 tree lhs = reduc_stmt_res (reduc->reduc_stmt);
1056 local_res = copy_ssa_name (lhs);
f5045c96 1057 locus = gimple_location (reduc->reduc_stmt);
a509ebb5 1058 new_phi = create_phi_node (local_res, store_bb);
9e227d60 1059 add_phi_arg (new_phi, reduc->init, e, locus);
e67d7a1e 1060 add_phi_arg (new_phi, lhs, FALLTHRU_EDGE (continue_bb), locus);
a509ebb5
RL
1061 reduc->new_phi = new_phi;
1062
1063 return 1;
1064}
5f40b3cb
ZD
1065
1066struct clsn_data
1067{
1068 tree store;
1069 tree load;
1070
1071 basic_block store_bb;
1072 basic_block load_bb;
1073};
1074
a509ebb5 1075/* Callback for htab_traverse. Create an atomic instruction for the
b8698a0f 1076 reduction described in SLOT.
a509ebb5
RL
1077 DATA annotates the place in memory the atomic operation relates to,
1078 and the basic block it needs to be generated in. */
1079
4a8fb1a1
LC
1080int
1081create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
a509ebb5 1082{
4a8fb1a1 1083 struct reduction_info *const reduc = *slot;
726a989a 1084 gimple_stmt_iterator gsi;
a509ebb5 1085 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
a509ebb5
RL
1086 tree load_struct;
1087 basic_block bb;
1088 basic_block new_bb;
1089 edge e;
0f900dfa 1090 tree t, addr, ref, x;
726a989a 1091 tree tmp_load, name;
355fe088 1092 gimple *load;
a509ebb5 1093
70f34814 1094 load_struct = build_simple_mem_ref (clsn_data->load);
a509ebb5 1095 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
a509ebb5 1096
aa00059c 1097 addr = build_addr (t);
a509ebb5
RL
1098
1099 /* Create phi node. */
1100 bb = clsn_data->load_bb;
1101
b13c907a
RB
1102 gsi = gsi_last_bb (bb);
1103 e = split_block (bb, gsi_stmt (gsi));
a509ebb5
RL
1104 new_bb = e->dest;
1105
b731b390
JJ
1106 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1107 tmp_load = make_ssa_name (tmp_load);
726a989a 1108 load = gimple_build_omp_atomic_load (tmp_load, addr);
a509ebb5 1109 SSA_NAME_DEF_STMT (tmp_load) = load;
726a989a
RB
1110 gsi = gsi_start_bb (new_bb);
1111 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
a509ebb5
RL
1112
1113 e = split_block (new_bb, load);
1114 new_bb = e->dest;
726a989a 1115 gsi = gsi_start_bb (new_bb);
a509ebb5 1116 ref = tmp_load;
726a989a
RB
1117 x = fold_build2 (reduc->reduction_code,
1118 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1119 PHI_RESULT (reduc->new_phi));
a509ebb5 1120
726a989a
RB
1121 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1122 GSI_CONTINUE_LINKING);
a509ebb5 1123
726a989a 1124 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
a509ebb5
RL
1125 return 1;
1126}
1127
b8698a0f
L
1128/* Create the atomic operation at the join point of the threads.
1129 REDUCTION_LIST describes the reductions in the LOOP.
1130 LD_ST_DATA describes the shared data structure where
a509ebb5
RL
1131 shared data is stored in and loaded from. */
1132static void
4a8fb1a1 1133create_call_for_reduction (struct loop *loop,
c203e8a7 1134 reduction_info_table_type *reduction_list,
a509ebb5
RL
1135 struct clsn_data *ld_st_data)
1136{
c203e8a7 1137 reduction_list->traverse <struct loop *, create_phi_for_local_result> (loop);
726a989a 1138 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
e67d7a1e
TV
1139 basic_block continue_bb = single_pred (loop->latch);
1140 ld_st_data->load_bb = FALLTHRU_EDGE (continue_bb)->dest;
4a8fb1a1 1141 reduction_list
c203e8a7 1142 ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
a509ebb5
RL
1143}
1144
ae0bce62
RL
1145/* Callback for htab_traverse. Loads the final reduction value at the
1146 join point of all threads, and inserts it in the right place. */
a509ebb5 1147
4a8fb1a1
LC
1148int
1149create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
a509ebb5 1150{
4a8fb1a1 1151 struct reduction_info *const red = *slot;
355fe088 1152 gimple *stmt;
726a989a 1153 gimple_stmt_iterator gsi;
12efb1d7 1154 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
a509ebb5 1155 tree load_struct;
ae0bce62 1156 tree name;
a509ebb5
RL
1157 tree x;
1158
79855460
TV
1159 /* If there's no exit phi, the result of the reduction is unused. */
1160 if (red->keep_res == NULL)
1161 return 1;
1162
726a989a 1163 gsi = gsi_after_labels (clsn_data->load_bb);
70f34814 1164 load_struct = build_simple_mem_ref (clsn_data->load);
a509ebb5
RL
1165 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1166 NULL_TREE);
a509ebb5 1167
ae0bce62 1168 x = load_struct;
a509ebb5 1169 name = PHI_RESULT (red->keep_res);
726a989a 1170 stmt = gimple_build_assign (name, x);
a509ebb5 1171
726a989a 1172 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
a509ebb5 1173
726a989a
RB
1174 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1175 !gsi_end_p (gsi); gsi_next (&gsi))
1176 if (gsi_stmt (gsi) == red->keep_res)
1177 {
1178 remove_phi_node (&gsi, false);
1179 return 1;
1180 }
1181 gcc_unreachable ();
a509ebb5
RL
1182}
1183
b8698a0f 1184/* Load the reduction result that was stored in LD_ST_DATA.
a509ebb5 1185 REDUCTION_LIST describes the list of reductions that the
fa10beec 1186 loads should be generated for. */
a509ebb5 1187static void
c203e8a7 1188create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
a509ebb5
RL
1189 struct clsn_data *ld_st_data)
1190{
726a989a 1191 gimple_stmt_iterator gsi;
a509ebb5 1192 tree t;
355fe088 1193 gimple *stmt;
a509ebb5 1194
726a989a 1195 gsi = gsi_after_labels (ld_st_data->load_bb);
a509ebb5 1196 t = build_fold_addr_expr (ld_st_data->store);
726a989a 1197 stmt = gimple_build_assign (ld_st_data->load, t);
a509ebb5 1198
726a989a 1199 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
a509ebb5 1200
4a8fb1a1 1201 reduction_list
c203e8a7 1202 ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
a509ebb5
RL
1203
1204}
1205
0eb7e7aa
RL
1206/* Callback for htab_traverse. Store the neutral value for the
1207 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1208 1 for MULT_EXPR, etc. into the reduction field.
b8698a0f
L
1209 The reduction is specified in SLOT. The store information is
1210 passed in DATA. */
0eb7e7aa 1211
4a8fb1a1
LC
1212int
1213create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
0eb7e7aa 1214{
4a8fb1a1 1215 struct reduction_info *const red = *slot;
726a989a 1216 tree t;
355fe088 1217 gimple *stmt;
726a989a 1218 gimple_stmt_iterator gsi;
12efb1d7 1219 tree type = TREE_TYPE (reduc_stmt_res (red->reduc_stmt));
726a989a
RB
1220
1221 gsi = gsi_last_bb (clsn_data->store_bb);
1222 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1223 stmt = gimple_build_assign (t, red->initial_value);
726a989a 1224 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
0eb7e7aa
RL
1225
1226 return 1;
1227}
1228
a509ebb5
RL
1229/* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1230 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1231 specified in SLOT. */
1232
4a8fb1a1
LC
1233int
1234create_loads_and_stores_for_name (name_to_copy_elt **slot,
1235 struct clsn_data *clsn_data)
5f40b3cb 1236{
4a8fb1a1 1237 struct name_to_copy_elt *const elt = *slot;
726a989a 1238 tree t;
355fe088 1239 gimple *stmt;
726a989a 1240 gimple_stmt_iterator gsi;
5f40b3cb 1241 tree type = TREE_TYPE (elt->new_name);
5f40b3cb
ZD
1242 tree load_struct;
1243
726a989a
RB
1244 gsi = gsi_last_bb (clsn_data->store_bb);
1245 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1246 stmt = gimple_build_assign (t, ssa_name (elt->version));
726a989a 1247 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb 1248
726a989a 1249 gsi = gsi_last_bb (clsn_data->load_bb);
70f34814 1250 load_struct = build_simple_mem_ref (clsn_data->load);
726a989a
RB
1251 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1252 stmt = gimple_build_assign (elt->new_name, t);
726a989a 1253 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1254
1255 return 1;
1256}
1257
1258/* Moves all the variables used in LOOP and defined outside of it (including
1259 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1260 name) to a structure created for this purpose. The code
b8698a0f 1261
5f40b3cb
ZD
1262 while (1)
1263 {
1264 use (a);
1265 use (b);
1266 }
1267
1268 is transformed this way:
1269
1270 bb0:
1271 old.a = a;
1272 old.b = b;
1273
1274 bb1:
1275 a' = new->a;
1276 b' = new->b;
1277 while (1)
1278 {
1279 use (a');
1280 use (b');
1281 }
1282
1283 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1284 pointer `new' is intentionally not initialized (the loop will be split to a
1285 separate function later, and `new' will be initialized from its arguments).
a509ebb5 1286 LD_ST_DATA holds information about the shared data structure used to pass
b8698a0f
L
1287 information among the threads. It is initialized here, and
1288 gen_parallel_loop will pass it to create_call_for_reduction that
1289 needs this information. REDUCTION_LIST describes the reductions
a509ebb5 1290 in LOOP. */
5f40b3cb
ZD
1291
1292static void
4a8fb1a1 1293separate_decls_in_region (edge entry, edge exit,
c203e8a7 1294 reduction_info_table_type *reduction_list,
b8698a0f 1295 tree *arg_struct, tree *new_arg_struct,
9f9f72aa 1296 struct clsn_data *ld_st_data)
a509ebb5 1297
5f40b3cb 1298{
9f9f72aa 1299 basic_block bb1 = split_edge (entry);
5f40b3cb 1300 basic_block bb0 = single_pred (bb1);
c203e8a7
TS
1301 name_to_copy_table_type name_copies (10);
1302 int_tree_htab_type decl_copies (10);
5f40b3cb 1303 unsigned i;
726a989a
RB
1304 tree type, type_name, nvar;
1305 gimple_stmt_iterator gsi;
5f40b3cb 1306 struct clsn_data clsn_data;
00f96dc9 1307 auto_vec<basic_block, 3> body;
9f9f72aa
AP
1308 basic_block bb;
1309 basic_block entry_bb = bb1;
1310 basic_block exit_bb = exit->dest;
b5b8b0ac 1311 bool has_debug_stmt = false;
5f40b3cb 1312
726a989a 1313 entry = single_succ_edge (entry_bb);
9f9f72aa 1314 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 1315
9771b263 1316 FOR_EACH_VEC_ELT (body, i, bb)
9f9f72aa 1317 {
b8698a0f 1318 if (bb != entry_bb && bb != exit_bb)
9f9f72aa 1319 {
726a989a
RB
1320 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1321 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
c203e8a7 1322 &name_copies, &decl_copies);
726a989a
RB
1323
1324 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
b5b8b0ac 1325 {
355fe088 1326 gimple *stmt = gsi_stmt (gsi);
b5b8b0ac
AO
1327
1328 if (is_gimple_debug (stmt))
1329 has_debug_stmt = true;
1330 else
1331 separate_decls_in_region_stmt (entry, exit, stmt,
c203e8a7 1332 &name_copies, &decl_copies);
b5b8b0ac 1333 }
9f9f72aa 1334 }
5f40b3cb 1335 }
9f9f72aa 1336
b5b8b0ac
AO
1337 /* Now process debug bind stmts. We must not create decls while
1338 processing debug stmts, so we defer their processing so as to
1339 make sure we will have debug info for as many variables as
1340 possible (all of those that were dealt with in the loop above),
1341 and discard those for which we know there's nothing we can
1342 do. */
1343 if (has_debug_stmt)
9771b263 1344 FOR_EACH_VEC_ELT (body, i, bb)
b5b8b0ac
AO
1345 if (bb != entry_bb && bb != exit_bb)
1346 {
1347 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1348 {
355fe088 1349 gimple *stmt = gsi_stmt (gsi);
b5b8b0ac 1350
ddb555ed 1351 if (is_gimple_debug (stmt))
b5b8b0ac 1352 {
c203e8a7
TS
1353 if (separate_decls_in_region_debug (stmt, &name_copies,
1354 &decl_copies))
b5b8b0ac
AO
1355 {
1356 gsi_remove (&gsi, true);
1357 continue;
1358 }
1359 }
1360
1361 gsi_next (&gsi);
1362 }
1363 }
1364
c203e8a7 1365 if (name_copies.elements () == 0 && reduction_list->elements () == 0)
5f40b3cb
ZD
1366 {
1367 /* It may happen that there is nothing to copy (if there are only
a509ebb5 1368 loop carried and external variables in the loop). */
5f40b3cb
ZD
1369 *arg_struct = NULL;
1370 *new_arg_struct = NULL;
1371 }
1372 else
1373 {
1374 /* Create the type for the structure to store the ssa names to. */
1375 type = lang_hooks.types.make_type (RECORD_TYPE);
9ff70652 1376 type_name = build_decl (UNKNOWN_LOCATION,
c2255bc4 1377 TYPE_DECL, create_tmp_var_name (".paral_data"),
5f40b3cb
ZD
1378 type);
1379 TYPE_NAME (type) = type_name;
1380
4a8fb1a1 1381 name_copies.traverse <tree, add_field_for_name> (type);
c203e8a7 1382 if (reduction_list && reduction_list->elements () > 0)
0eb7e7aa
RL
1383 {
1384 /* Create the fields for reductions. */
c203e8a7 1385 reduction_list->traverse <tree, add_field_for_reduction> (type);
0eb7e7aa 1386 }
5f40b3cb 1387 layout_type (type);
b8698a0f 1388
5f40b3cb
ZD
1389 /* Create the loads and stores. */
1390 *arg_struct = create_tmp_var (type, ".paral_data_store");
5f40b3cb 1391 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
b731b390 1392 *new_arg_struct = make_ssa_name (nvar);
5f40b3cb 1393
a509ebb5
RL
1394 ld_st_data->store = *arg_struct;
1395 ld_st_data->load = *new_arg_struct;
1396 ld_st_data->store_bb = bb0;
1397 ld_st_data->load_bb = bb1;
0eb7e7aa 1398
4a8fb1a1
LC
1399 name_copies
1400 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1401 (ld_st_data);
a509ebb5 1402
ae0bce62
RL
1403 /* Load the calculation from memory (after the join of the threads). */
1404
c203e8a7 1405 if (reduction_list && reduction_list->elements () > 0)
a509ebb5 1406 {
4a8fb1a1 1407 reduction_list
c203e8a7
TS
1408 ->traverse <struct clsn_data *, create_stores_for_reduction>
1409 (ld_st_data);
b731b390 1410 clsn_data.load = make_ssa_name (nvar);
9f9f72aa 1411 clsn_data.load_bb = exit->dest;
a509ebb5
RL
1412 clsn_data.store = ld_st_data->store;
1413 create_final_loads_for_reduction (reduction_list, &clsn_data);
1414 }
5f40b3cb 1415 }
5f40b3cb
ZD
1416}
1417
a79b7ec5 1418/* Returns true if FN was created to run in parallel. */
5f40b3cb 1419
62e0a1ed 1420bool
a79b7ec5 1421parallelized_function_p (tree fndecl)
5f40b3cb 1422{
a79b7ec5
TV
1423 cgraph_node *node = cgraph_node::get (fndecl);
1424 gcc_assert (node != NULL);
1425 return node->parallelized_function;
5f40b3cb
ZD
1426}
1427
1428/* Creates and returns an empty function that will receive the body of
1429 a parallelized loop. */
1430
1431static tree
9ff70652 1432create_loop_fn (location_t loc)
5f40b3cb
ZD
1433{
1434 char buf[100];
1435 char *tname;
1436 tree decl, type, name, t;
1437 struct function *act_cfun = cfun;
1438 static unsigned loopfn_num;
1439
5368224f 1440 loc = LOCATION_LOCUS (loc);
5f40b3cb
ZD
1441 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1442 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1443 clean_symbol_name (tname);
1444 name = get_identifier (tname);
1445 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1446
9ff70652 1447 decl = build_decl (loc, FUNCTION_DECL, name, type);
5f40b3cb
ZD
1448 TREE_STATIC (decl) = 1;
1449 TREE_USED (decl) = 1;
1450 DECL_ARTIFICIAL (decl) = 1;
1451 DECL_IGNORED_P (decl) = 0;
1452 TREE_PUBLIC (decl) = 0;
1453 DECL_UNINLINABLE (decl) = 1;
1454 DECL_EXTERNAL (decl) = 0;
1455 DECL_CONTEXT (decl) = NULL_TREE;
1456 DECL_INITIAL (decl) = make_node (BLOCK);
1457
9ff70652 1458 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
5f40b3cb
ZD
1459 DECL_ARTIFICIAL (t) = 1;
1460 DECL_IGNORED_P (t) = 1;
1461 DECL_RESULT (decl) = t;
1462
9ff70652 1463 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
5f40b3cb
ZD
1464 ptr_type_node);
1465 DECL_ARTIFICIAL (t) = 1;
1466 DECL_ARG_TYPE (t) = ptr_type_node;
1467 DECL_CONTEXT (t) = decl;
1468 TREE_USED (t) = 1;
1469 DECL_ARGUMENTS (decl) = t;
1470
182e0d71 1471 allocate_struct_function (decl, false);
5f40b3cb
ZD
1472
1473 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1474 it. */
5576d6f2 1475 set_cfun (act_cfun);
5f40b3cb
ZD
1476
1477 return decl;
1478}
1479
7c82d827
TV
1480/* Replace uses of NAME by VAL in block BB. */
1481
1482static void
1483replace_uses_in_bb_by (tree name, tree val, basic_block bb)
1484{
355fe088 1485 gimple *use_stmt;
7c82d827
TV
1486 imm_use_iterator imm_iter;
1487
1488 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
1489 {
1490 if (gimple_bb (use_stmt) != bb)
1491 continue;
1492
1493 use_operand_p use_p;
1494 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1495 SET_USE (use_p, val);
1496 }
1497}
1498
7c82d827
TV
1499/* Do transformation from:
1500
1501 <bb preheader>:
1502 ...
1503 goto <bb header>
1504
1505 <bb header>:
1506 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1507 sum_a = PHI <sum_init (preheader), sum_b (latch)>
1508 ...
1509 use (ivtmp_a)
1510 ...
1511 sum_b = sum_a + sum_update
1512 ...
1513 if (ivtmp_a < n)
1514 goto <bb latch>;
1515 else
1516 goto <bb exit>;
1517
1518 <bb latch>:
1519 ivtmp_b = ivtmp_a + 1;
1520 goto <bb header>
1521
1522 <bb exit>:
712cb0bb 1523 sum_z = PHI <sum_b (cond[1]), ...>
7c82d827
TV
1524
1525 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
1526 that's <bb header>.
1527
1528 to:
1529
1530 <bb preheader>:
1531 ...
1532 goto <bb newheader>
1533
1534 <bb header>:
1535 ivtmp_a = PHI <ivtmp_c (latch)>
1536 sum_a = PHI <sum_c (latch)>
1537 ...
1538 use (ivtmp_a)
1539 ...
1540 sum_b = sum_a + sum_update
1541 ...
1542 goto <bb latch>;
1543
1544 <bb newheader>:
1545 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1546 sum_c = PHI <sum_init (preheader), sum_b (latch)>
1547 if (ivtmp_c < n + 1)
1548 goto <bb header>;
1549 else
712cb0bb 1550 goto <bb newexit>;
7c82d827
TV
1551
1552 <bb latch>:
1553 ivtmp_b = ivtmp_a + 1;
1554 goto <bb newheader>
1555
712cb0bb
TV
1556 <bb newexit>:
1557 sum_y = PHI <sum_c (newheader)>
1558
7c82d827 1559 <bb exit>:
712cb0bb 1560 sum_z = PHI <sum_y (newexit), ...>
7c82d827
TV
1561
1562
1563 In unified diff format:
1564
1565 <bb preheader>:
1566 ...
1567- goto <bb header>
1568+ goto <bb newheader>
1569
1570 <bb header>:
1571- ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1572- sum_a = PHI <sum_init (preheader), sum_b (latch)>
1573+ ivtmp_a = PHI <ivtmp_c (latch)>
1574+ sum_a = PHI <sum_c (latch)>
1575 ...
1576 use (ivtmp_a)
1577 ...
1578 sum_b = sum_a + sum_update
1579 ...
1580- if (ivtmp_a < n)
1581- goto <bb latch>;
1582+ goto <bb latch>;
1583+
1584+ <bb newheader>:
1585+ ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1586+ sum_c = PHI <sum_init (preheader), sum_b (latch)>
1587+ if (ivtmp_c < n + 1)
1588+ goto <bb header>;
1589 else
1590 goto <bb exit>;
1591
1592 <bb latch>:
1593 ivtmp_b = ivtmp_a + 1;
1594- goto <bb header>
1595+ goto <bb newheader>
1596
712cb0bb
TV
1597+ <bb newexit>:
1598+ sum_y = PHI <sum_c (newheader)>
1599
7c82d827 1600 <bb exit>:
712cb0bb
TV
1601- sum_z = PHI <sum_b (cond[1]), ...>
1602+ sum_z = PHI <sum_y (newexit), ...>
7c82d827
TV
1603
1604 Note: the example does not show any virtual phis, but these are handled more
1605 or less as reductions.
b8698a0f 1606
7c82d827
TV
1607
1608 Moves the exit condition of LOOP to the beginning of its header.
1609 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
1610 bound. */
1611
1612static void
1613transform_to_exit_first_loop_alt (struct loop *loop,
1614 reduction_info_table_type *reduction_list,
1615 tree bound)
1616{
1617 basic_block header = loop->header;
1618 basic_block latch = loop->latch;
1619 edge exit = single_dom_exit (loop);
1620 basic_block exit_block = exit->dest;
1621 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1622 tree control = gimple_cond_lhs (cond_stmt);
1623 edge e;
1624
338392ed
TV
1625 /* Rewriting virtuals into loop-closed ssa normal form makes this
1626 transformation simpler. It also ensures that the virtuals are in
1627 loop-closed ssa normal from after the transformation, which is required by
1628 create_parallel_loop. */
1629 rewrite_virtuals_into_loop_closed_ssa (loop);
7c82d827
TV
1630
1631 /* Create the new_header block. */
1632 basic_block new_header = split_block_before_cond_jump (exit->src);
712cb0bb 1633 edge edge_at_split = single_pred_edge (new_header);
7c82d827
TV
1634
1635 /* Redirect entry edge to new_header. */
1636 edge entry = loop_preheader_edge (loop);
1637 e = redirect_edge_and_branch (entry, new_header);
1638 gcc_assert (e == entry);
1639
1640 /* Redirect post_inc_edge to new_header. */
1641 edge post_inc_edge = single_succ_edge (latch);
1642 e = redirect_edge_and_branch (post_inc_edge, new_header);
1643 gcc_assert (e == post_inc_edge);
1644
1645 /* Redirect post_cond_edge to header. */
1646 edge post_cond_edge = single_pred_edge (latch);
1647 e = redirect_edge_and_branch (post_cond_edge, header);
1648 gcc_assert (e == post_cond_edge);
1649
712cb0bb
TV
1650 /* Redirect edge_at_split to latch. */
1651 e = redirect_edge_and_branch (edge_at_split, latch);
1652 gcc_assert (e == edge_at_split);
7c82d827
TV
1653
1654 /* Set the new loop bound. */
1655 gimple_cond_set_rhs (cond_stmt, bound);
5a5fd951 1656 update_stmt (cond_stmt);
7c82d827
TV
1657
1658 /* Repair the ssa. */
1659 vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
1660 edge_var_map *vm;
1661 gphi_iterator gsi;
338392ed 1662 int i;
7c82d827
TV
1663 for (gsi = gsi_start_phis (header), i = 0;
1664 !gsi_end_p (gsi) && v->iterate (i, &vm);
1665 gsi_next (&gsi), i++)
1666 {
1667 gphi *phi = gsi.phi ();
1668 tree res_a = PHI_RESULT (phi);
1669
1670 /* Create new phi. */
1671 tree res_c = copy_ssa_name (res_a, phi);
1672 gphi *nphi = create_phi_node (res_c, new_header);
1673
1674 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
1675 replace_uses_in_bb_by (res_a, res_c, new_header);
1676
1677 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
1678 add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
1679
338392ed 1680 /* Replace sum_b with sum_c in exit phi. */
7c82d827 1681 tree res_b = redirect_edge_var_map_def (vm);
338392ed 1682 replace_uses_in_bb_by (res_b, res_c, exit_block);
7c82d827
TV
1683
1684 struct reduction_info *red = reduction_phi (reduction_list, phi);
1685 gcc_assert (virtual_operand_p (res_a)
1686 || res_a == control
1687 || red != NULL);
1688
1689 if (red)
1690 {
1691 /* Register the new reduction phi. */
1692 red->reduc_phi = nphi;
1693 gimple_set_uid (red->reduc_phi, red->reduc_version);
1694 }
1695 }
1696 gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
7c82d827
TV
1697
1698 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
1699 flush_pending_stmts (entry);
1700
1701 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
1702 flush_pending_stmts (post_inc_edge);
1703
712cb0bb
TV
1704 /* Create a new empty exit block, inbetween the new loop header and the old
1705 exit block. The function separate_decls_in_region needs this block to
1706 insert code that is active on loop exit, but not any other path. */
1707 basic_block new_exit_block = split_edge (exit);
1708
1709 /* Insert and register the reduction exit phis. */
7c82d827
TV
1710 for (gphi_iterator gsi = gsi_start_phis (exit_block);
1711 !gsi_end_p (gsi);
1712 gsi_next (&gsi))
1713 {
1714 gphi *phi = gsi.phi ();
1715 tree res_z = PHI_RESULT (phi);
712cb0bb
TV
1716
1717 /* Now that we have a new exit block, duplicate the phi of the old exit
1718 block in the new exit block to preserve loop-closed ssa. */
1719 edge succ_new_exit_block = single_succ_edge (new_exit_block);
1720 edge pred_new_exit_block = single_pred_edge (new_exit_block);
1721 tree res_y = copy_ssa_name (res_z, phi);
1722 gphi *nphi = create_phi_node (res_y, new_exit_block);
1723 tree res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
1724 add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
1725 add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
1726
7c82d827
TV
1727 if (virtual_operand_p (res_z))
1728 continue;
1729
355fe088 1730 gimple *reduc_phi = SSA_NAME_DEF_STMT (res_c);
7c82d827
TV
1731 struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
1732 if (red != NULL)
712cb0bb 1733 red->keep_res = nphi;
7c82d827
TV
1734 }
1735
1736 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
1737 then we're still using some fields, so only bother about fields that are
1738 still used: header and latch.
1739 The loop has a new header bb, so we update it. The latch bb stays the
1740 same. */
1741 loop->header = new_header;
1742
1743 /* Recalculate dominance info. */
1744 free_dominance_info (CDI_DOMINATORS);
1745 calculate_dominance_info (CDI_DOMINATORS);
1746}
1747
1748/* Tries to moves the exit condition of LOOP to the beginning of its header
1749 without duplication of the loop body. NIT is the number of iterations of the
1750 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
1751 transformation is successful. */
1752
1753static bool
1754try_transform_to_exit_first_loop_alt (struct loop *loop,
1755 reduction_info_table_type *reduction_list,
1756 tree nit)
1757{
1758 /* Check whether the latch contains a single statement. */
1b7f61eb
TV
1759 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
1760 return false;
7c82d827
TV
1761
1762 /* Check whether the latch contains the loop iv increment. */
1763 edge back = single_succ_edge (loop->latch);
1764 edge exit = single_dom_exit (loop);
1765 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1766 tree control = gimple_cond_lhs (cond_stmt);
1767 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
1768 tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
1769 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
1770 return false;
1771
1772 /* Check whether there's no code between the loop condition and the latch. */
1773 if (!single_pred_p (loop->latch)
1774 || single_pred (loop->latch) != exit->src)
1775 return false;
1776
1777 tree alt_bound = NULL_TREE;
1778 tree nit_type = TREE_TYPE (nit);
1779
1780 /* Figure out whether nit + 1 overflows. */
1781 if (TREE_CODE (nit) == INTEGER_CST)
1782 {
1783 if (!tree_int_cst_equal (nit, TYPE_MAXVAL (nit_type)))
1784 {
1785 alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
1786 nit, build_one_cst (nit_type));
1787
1788 gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
fd7b3ef5
TV
1789 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1790 return true;
7c82d827
TV
1791 }
1792 else
1793 {
1794 /* Todo: Figure out if we can trigger this, if it's worth to handle
1795 optimally, and if we can handle it optimally. */
fd7b3ef5 1796 return false;
7c82d827
TV
1797 }
1798 }
7c82d827 1799
fd7b3ef5 1800 gcc_assert (TREE_CODE (nit) == SSA_NAME);
7c82d827 1801
4f75d608
TV
1802 /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
1803 iv with base 0 and step 1 that is incremented in the latch, like this:
1804
1805 <bb header>:
1806 # iv_1 = PHI <0 (preheader), iv_2 (latch)>
1807 ...
1808 if (iv_1 < nit)
1809 goto <bb latch>;
1810 else
1811 goto <bb exit>;
1812
1813 <bb latch>:
1814 iv_2 = iv_1 + 1;
1815 goto <bb header>;
1816
1817 The range of iv_1 is [0, nit]. The latch edge is taken for
1818 iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit. So the
1819 number of latch executions is equal to nit.
1820
1821 The function max_loop_iterations gives us the maximum number of latch
1822 executions, so it gives us the maximum value of nit. */
1823 widest_int nit_max;
1824 if (!max_loop_iterations (loop, &nit_max))
1825 return false;
1826
1827 /* Check if nit + 1 overflows. */
1828 widest_int type_max = wi::to_widest (TYPE_MAXVAL (nit_type));
1829 if (!wi::lts_p (nit_max, type_max))
1830 return false;
1831
355fe088 1832 gimple *def = SSA_NAME_DEF_STMT (nit);
7c82d827 1833
4f75d608 1834 /* Try to find nit + 1, in the form of n in an assignment nit = n - 1. */
fd7b3ef5
TV
1835 if (def
1836 && is_gimple_assign (def)
1837 && gimple_assign_rhs_code (def) == PLUS_EXPR)
1838 {
1839 tree op1 = gimple_assign_rhs1 (def);
1840 tree op2 = gimple_assign_rhs2 (def);
1841 if (integer_minus_onep (op1))
1842 alt_bound = op2;
1843 else if (integer_minus_onep (op2))
1844 alt_bound = op1;
7c82d827
TV
1845 }
1846
9f620bf1 1847 /* If not found, insert nit + 1. */
7c82d827 1848 if (alt_bound == NULL_TREE)
9f620bf1
TV
1849 {
1850 alt_bound = fold_build2 (PLUS_EXPR, nit_type, nit,
1851 build_int_cst_type (nit_type, 1));
1852
1853 gimple_stmt_iterator gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
1854
1855 alt_bound
1856 = force_gimple_operand_gsi (&gsi, alt_bound, true, NULL_TREE, false,
1857 GSI_CONTINUE_LINKING);
1858 }
7c82d827
TV
1859
1860 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1861 return true;
1862}
1863
1864/* Moves the exit condition of LOOP to the beginning of its header. NIT is the
1865 number of iterations of the loop. REDUCTION_LIST describes the reductions in
1866 LOOP. */
5f40b3cb
ZD
1867
1868static void
4a8fb1a1 1869transform_to_exit_first_loop (struct loop *loop,
c203e8a7 1870 reduction_info_table_type *reduction_list,
4a8fb1a1 1871 tree nit)
5f40b3cb
ZD
1872{
1873 basic_block *bbs, *nbbs, ex_bb, orig_header;
1874 unsigned n;
1875 bool ok;
1876 edge exit = single_dom_exit (loop), hpred;
726a989a 1877 tree control, control_name, res, t;
538dd0b7
DM
1878 gphi *phi, *nphi;
1879 gassign *stmt;
1880 gcond *cond_stmt, *cond_nit;
48710229 1881 tree nit_1;
5f40b3cb
ZD
1882
1883 split_block_after_labels (loop->header);
1884 orig_header = single_succ (loop->header);
1885 hpred = single_succ_edge (loop->header);
1886
538dd0b7 1887 cond_stmt = as_a <gcond *> (last_stmt (exit->src));
726a989a
RB
1888 control = gimple_cond_lhs (cond_stmt);
1889 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
5f40b3cb
ZD
1890
1891 /* Make sure that we have phi nodes on exit for all loop header phis
1892 (create_parallel_loop requires that). */
538dd0b7
DM
1893 for (gphi_iterator gsi = gsi_start_phis (loop->header);
1894 !gsi_end_p (gsi);
1895 gsi_next (&gsi))
5f40b3cb 1896 {
538dd0b7 1897 phi = gsi.phi ();
5f40b3cb 1898 res = PHI_RESULT (phi);
070ecdfd 1899 t = copy_ssa_name (res, phi);
5f40b3cb 1900 SET_PHI_RESULT (phi, t);
5f40b3cb 1901 nphi = create_phi_node (res, orig_header);
9e227d60 1902 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
5f40b3cb
ZD
1903
1904 if (res == control)
1905 {
726a989a 1906 gimple_cond_set_lhs (cond_stmt, t);
5f40b3cb
ZD
1907 update_stmt (cond_stmt);
1908 control = t;
1909 }
1910 }
12037899 1911
5f40b3cb 1912 bbs = get_loop_body_in_dom_order (loop);
48710229 1913
69958396
RL
1914 for (n = 0; bbs[n] != exit->src; n++)
1915 continue;
5f40b3cb 1916 nbbs = XNEWVEC (basic_block, n);
726a989a
RB
1917 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1918 bbs + 1, n, nbbs);
5f40b3cb
ZD
1919 gcc_assert (ok);
1920 free (bbs);
1921 ex_bb = nbbs[0];
1922 free (nbbs);
1923
b8698a0f 1924 /* Other than reductions, the only gimple reg that should be copied
726a989a 1925 out of the loop is the control variable. */
69958396 1926 exit = single_dom_exit (loop);
5f40b3cb 1927 control_name = NULL_TREE;
538dd0b7
DM
1928 for (gphi_iterator gsi = gsi_start_phis (ex_bb);
1929 !gsi_end_p (gsi); )
5f40b3cb 1930 {
538dd0b7 1931 phi = gsi.phi ();
5f40b3cb 1932 res = PHI_RESULT (phi);
ea057359 1933 if (virtual_operand_p (res))
726a989a
RB
1934 {
1935 gsi_next (&gsi);
1936 continue;
1937 }
5f40b3cb 1938
a509ebb5 1939 /* Check if it is a part of reduction. If it is,
b8698a0f
L
1940 keep the phi at the reduction's keep_res field. The
1941 PHI_RESULT of this phi is the resulting value of the reduction
a509ebb5
RL
1942 variable when exiting the loop. */
1943
c203e8a7 1944 if (reduction_list->elements () > 0)
a509ebb5
RL
1945 {
1946 struct reduction_info *red;
1947
1948 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
a509ebb5
RL
1949 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1950 if (red)
726a989a
RB
1951 {
1952 red->keep_res = phi;
1953 gsi_next (&gsi);
1954 continue;
1955 }
a509ebb5 1956 }
726a989a
RB
1957 gcc_assert (control_name == NULL_TREE
1958 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
5f40b3cb 1959 control_name = res;
726a989a 1960 remove_phi_node (&gsi, false);
5f40b3cb
ZD
1961 }
1962 gcc_assert (control_name != NULL_TREE);
5f40b3cb 1963
b8698a0f 1964 /* Initialize the control variable to number of iterations
48710229 1965 according to the rhs of the exit condition. */
538dd0b7
DM
1966 gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
1967 cond_nit = as_a <gcond *> (last_stmt (exit->src));
48710229
RL
1968 nit_1 = gimple_cond_rhs (cond_nit);
1969 nit_1 = force_gimple_operand_gsi (&gsi,
1970 fold_convert (TREE_TYPE (control_name), nit_1),
726a989a 1971 false, NULL_TREE, false, GSI_SAME_STMT);
48710229 1972 stmt = gimple_build_assign (control_name, nit_1);
726a989a 1973 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1974}
1975
1976/* Create the parallel constructs for LOOP as described in gen_parallel_loop.
726a989a 1977 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
5f40b3cb
ZD
1978 NEW_DATA is the variable that should be initialized from the argument
1979 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
726a989a 1980 basic block containing GIMPLE_OMP_PARALLEL tree. */
5f40b3cb
ZD
1981
1982static basic_block
1983create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
9ff70652 1984 tree new_data, unsigned n_threads, location_t loc)
5f40b3cb 1985{
726a989a 1986 gimple_stmt_iterator gsi;
e67d7a1e 1987 basic_block bb, paral_bb, for_bb, ex_bb, continue_bb;
0f900dfa 1988 tree t, param;
538dd0b7 1989 gomp_parallel *omp_par_stmt;
355fe088
TS
1990 gimple *omp_return_stmt1, *omp_return_stmt2;
1991 gimple *phi;
538dd0b7
DM
1992 gcond *cond_stmt;
1993 gomp_for *for_stmt;
1994 gomp_continue *omp_cont_stmt;
726a989a 1995 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
5f40b3cb
ZD
1996 edge exit, nexit, guard, end, e;
1997
726a989a 1998 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
5f40b3cb
ZD
1999 bb = loop_preheader_edge (loop)->src;
2000 paral_bb = single_pred (bb);
726a989a 2001 gsi = gsi_last_bb (paral_bb);
5f40b3cb 2002
9ff70652 2003 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
5f40b3cb 2004 OMP_CLAUSE_NUM_THREADS_EXPR (t)
a509ebb5 2005 = build_int_cst (integer_type_node, n_threads);
538dd0b7
DM
2006 omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
2007 gimple_set_location (omp_par_stmt, loc);
5f40b3cb 2008
538dd0b7 2009 gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
5f40b3cb
ZD
2010
2011 /* Initialize NEW_DATA. */
2012 if (data)
2013 {
538dd0b7
DM
2014 gassign *assign_stmt;
2015
726a989a
RB
2016 gsi = gsi_after_labels (bb);
2017
b731b390 2018 param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
538dd0b7
DM
2019 assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
2020 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
726a989a 2021
538dd0b7 2022 assign_stmt = gimple_build_assign (new_data,
726a989a 2023 fold_convert (TREE_TYPE (new_data), param));
538dd0b7 2024 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5f40b3cb
ZD
2025 }
2026
726a989a 2027 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
5f40b3cb 2028 bb = split_loop_exit_edge (single_dom_exit (loop));
726a989a 2029 gsi = gsi_last_bb (bb);
538dd0b7
DM
2030 omp_return_stmt1 = gimple_build_omp_return (false);
2031 gimple_set_location (omp_return_stmt1, loc);
2032 gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
5f40b3cb 2033
726a989a 2034 /* Extract data for GIMPLE_OMP_FOR. */
5f40b3cb 2035 gcc_assert (loop->header == single_dom_exit (loop)->src);
538dd0b7 2036 cond_stmt = as_a <gcond *> (last_stmt (loop->header));
5f40b3cb 2037
726a989a 2038 cvar = gimple_cond_lhs (cond_stmt);
5f40b3cb
ZD
2039 cvar_base = SSA_NAME_VAR (cvar);
2040 phi = SSA_NAME_DEF_STMT (cvar);
2041 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
b731b390 2042 initvar = copy_ssa_name (cvar);
5f40b3cb
ZD
2043 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2044 initvar);
2045 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2046
1dff453d 2047 gsi = gsi_last_nondebug_bb (loop->latch);
726a989a
RB
2048 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2049 gsi_remove (&gsi, true);
5f40b3cb
ZD
2050
2051 /* Prepare cfg. */
2052 for_bb = split_edge (loop_preheader_edge (loop));
2053 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2054 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2055 gcc_assert (exit == single_dom_exit (loop));
2056
2057 guard = make_edge (for_bb, ex_bb, 0);
e67d7a1e
TV
2058 /* Split the latch edge, so LOOPS_HAVE_SIMPLE_LATCHES is still valid. */
2059 loop->latch = split_edge (single_succ_edge (loop->latch));
2060 single_pred_edge (loop->latch)->flags = 0;
2061 end = make_edge (single_pred (loop->latch), ex_bb, EDGE_FALLTHRU);
2062 rescan_loop_exit (end, true, false);
2063
538dd0b7
DM
2064 for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2065 !gsi_end_p (gpi); gsi_next (&gpi))
5f40b3cb 2066 {
f5045c96 2067 source_location locus;
538dd0b7 2068 gphi *phi = gpi.phi ();
7781d262 2069 tree def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
355fe088 2070 gimple *def_stmt = SSA_NAME_DEF_STMT (def);
538dd0b7 2071
7781d262
TV
2072 /* If the exit phi is not connected to a header phi in the same loop, this
2073 value is not modified in the loop, and we're done with this phi. */
2074 if (!(gimple_code (def_stmt) == GIMPLE_PHI
2075 && gimple_bb (def_stmt) == loop->header))
2076 continue;
f5045c96 2077
7781d262 2078 gphi *stmt = as_a <gphi *> (def_stmt);
f5045c96 2079 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
b8698a0f 2080 locus = gimple_phi_arg_location_from_edge (stmt,
f5045c96 2081 loop_preheader_edge (loop));
9e227d60 2082 add_phi_arg (phi, def, guard, locus);
f5045c96
AM
2083
2084 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2085 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
9e227d60 2086 add_phi_arg (phi, def, end, locus);
5f40b3cb
ZD
2087 }
2088 e = redirect_edge_and_branch (exit, nexit->dest);
2089 PENDING_STMT (e) = NULL;
2090
726a989a
RB
2091 /* Emit GIMPLE_OMP_FOR. */
2092 gimple_cond_set_lhs (cond_stmt, cvar_base);
5f40b3cb 2093 type = TREE_TYPE (cvar);
9ff70652 2094 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
f7f18684 2095 int chunk_size = PARAM_VALUE (PARAM_PARLOOPS_CHUNK_SIZE);
1f600fea
TV
2096 enum PARAM_PARLOOPS_SCHEDULE_KIND schedule_type \
2097 = (enum PARAM_PARLOOPS_SCHEDULE_KIND) PARAM_VALUE (PARAM_PARLOOPS_SCHEDULE);
2098 switch (schedule_type)
2099 {
2100 case PARAM_PARLOOPS_SCHEDULE_KIND_static:
2101 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2102 break;
2103 case PARAM_PARLOOPS_SCHEDULE_KIND_dynamic:
2104 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_DYNAMIC;
2105 break;
2106 case PARAM_PARLOOPS_SCHEDULE_KIND_guided:
2107 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_GUIDED;
2108 break;
2109 case PARAM_PARLOOPS_SCHEDULE_KIND_auto:
2110 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_AUTO;
2111 chunk_size = 0;
2112 break;
2113 case PARAM_PARLOOPS_SCHEDULE_KIND_runtime:
2114 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_RUNTIME;
2115 chunk_size = 0;
2116 break;
2117 default:
2118 gcc_unreachable ();
2119 }
f7f18684
TV
2120 if (chunk_size != 0)
2121 OMP_CLAUSE_SCHEDULE_CHUNK_EXPR (t)
2122 = build_int_cst (integer_type_node, chunk_size);
5f40b3cb 2123
74bf76ed 2124 for_stmt = gimple_build_omp_for (NULL, GF_OMP_FOR_KIND_FOR, t, 1, NULL);
9ff70652 2125 gimple_set_location (for_stmt, loc);
726a989a
RB
2126 gimple_omp_for_set_index (for_stmt, 0, initvar);
2127 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2128 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2129 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2130 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2131 cvar_base,
2132 build_int_cst (type, 1)));
2133
2134 gsi = gsi_last_bb (for_bb);
2135 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
5f40b3cb
ZD
2136 SSA_NAME_DEF_STMT (initvar) = for_stmt;
2137
726a989a 2138 /* Emit GIMPLE_OMP_CONTINUE. */
e67d7a1e
TV
2139 continue_bb = single_pred (loop->latch);
2140 gsi = gsi_last_bb (continue_bb);
538dd0b7
DM
2141 omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2142 gimple_set_location (omp_cont_stmt, loc);
2143 gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2144 SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
5f40b3cb 2145
726a989a
RB
2146 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2147 gsi = gsi_last_bb (ex_bb);
538dd0b7
DM
2148 omp_return_stmt2 = gimple_build_omp_return (true);
2149 gimple_set_location (omp_return_stmt2, loc);
2150 gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
5f40b3cb 2151
cd7d9fd7
RG
2152 /* After the above dom info is hosed. Re-compute it. */
2153 free_dominance_info (CDI_DOMINATORS);
2154 calculate_dominance_info (CDI_DOMINATORS);
2155
5f40b3cb
ZD
2156 return paral_bb;
2157}
2158
08dab97a
RL
2159/* Generates code to execute the iterations of LOOP in N_THREADS
2160 threads in parallel.
2161
2162 NITER describes number of iterations of LOOP.
fa10beec 2163 REDUCTION_LIST describes the reductions existent in the LOOP. */
5f40b3cb
ZD
2164
2165static void
c203e8a7
TS
2166gen_parallel_loop (struct loop *loop,
2167 reduction_info_table_type *reduction_list,
a509ebb5 2168 unsigned n_threads, struct tree_niter_desc *niter)
5f40b3cb 2169{
5f40b3cb 2170 tree many_iterations_cond, type, nit;
726a989a
RB
2171 tree arg_struct, new_arg_struct;
2172 gimple_seq stmts;
9f9f72aa 2173 edge entry, exit;
a509ebb5 2174 struct clsn_data clsn_data;
5f40b3cb 2175 unsigned prob;
9ff70652 2176 location_t loc;
355fe088 2177 gimple *cond_stmt;
768da0da 2178 unsigned int m_p_thread=2;
5f40b3cb
ZD
2179
2180 /* From
2181
2182 ---------------------------------------------------------------------
2183 loop
2184 {
2185 IV = phi (INIT, IV + STEP)
2186 BODY1;
2187 if (COND)
2188 break;
2189 BODY2;
2190 }
2191 ---------------------------------------------------------------------
2192
2193 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2194 we generate the following code:
2195
2196 ---------------------------------------------------------------------
2197
2198 if (MAY_BE_ZERO
a509ebb5
RL
2199 || NITER < MIN_PER_THREAD * N_THREADS)
2200 goto original;
5f40b3cb
ZD
2201
2202 BODY1;
2203 store all local loop-invariant variables used in body of the loop to DATA.
726a989a 2204 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
5f40b3cb 2205 load the variables from DATA.
726a989a 2206 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
5f40b3cb
ZD
2207 BODY2;
2208 BODY1;
726a989a
RB
2209 GIMPLE_OMP_CONTINUE;
2210 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
2211 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
5f40b3cb
ZD
2212 goto end;
2213
2214 original:
2215 loop
2216 {
2217 IV = phi (INIT, IV + STEP)
2218 BODY1;
2219 if (COND)
2220 break;
2221 BODY2;
2222 }
2223
2224 end:
2225
2226 */
2227
2228 /* Create two versions of the loop -- in the old one, we know that the
2229 number of iterations is large enough, and we will transform it into the
2230 loop that will be split to loop_fn, the new one will be used for the
2231 remaining iterations. */
a509ebb5 2232
768da0da
RL
2233 /* We should compute a better number-of-iterations value for outer loops.
2234 That is, if we have
2235
2236 for (i = 0; i < n; ++i)
2237 for (j = 0; j < m; ++j)
2238 ...
2239
2240 we should compute nit = n * m, not nit = n.
2241 Also may_be_zero handling would need to be adjusted. */
2242
5f40b3cb
ZD
2243 type = TREE_TYPE (niter->niter);
2244 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
2245 NULL_TREE);
2246 if (stmts)
726a989a 2247 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb 2248
768da0da
RL
2249 if (loop->inner)
2250 m_p_thread=2;
2251 else
2252 m_p_thread=MIN_PER_THREAD;
2253
2254 many_iterations_cond =
2255 fold_build2 (GE_EXPR, boolean_type_node,
2256 nit, build_int_cst (type, m_p_thread * n_threads));
2257
5f40b3cb 2258 many_iterations_cond
a509ebb5
RL
2259 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2260 invert_truthvalue (unshare_expr (niter->may_be_zero)),
2261 many_iterations_cond);
5f40b3cb 2262 many_iterations_cond
a509ebb5 2263 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
5f40b3cb 2264 if (stmts)
726a989a 2265 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb
ZD
2266 if (!is_gimple_condexpr (many_iterations_cond))
2267 {
2268 many_iterations_cond
a509ebb5
RL
2269 = force_gimple_operand (many_iterations_cond, &stmts,
2270 true, NULL_TREE);
5f40b3cb 2271 if (stmts)
726a989a 2272 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb
ZD
2273 }
2274
2275 initialize_original_copy_tables ();
2276
2277 /* We assume that the loop usually iterates a lot. */
2278 prob = 4 * REG_BR_PROB_BASE / 5;
0f900dfa
JJ
2279 loop_version (loop, many_iterations_cond, NULL,
2280 prob, prob, REG_BR_PROB_BASE - prob, true);
5f40b3cb
ZD
2281 update_ssa (TODO_update_ssa);
2282 free_original_copy_tables ();
2283
2284 /* Base all the induction variables in LOOP on a single control one. */
c80a5403 2285 canonicalize_loop_ivs (loop, &nit, true);
5f40b3cb 2286
7c82d827
TV
2287 /* Ensure that the exit condition is the first statement in the loop.
2288 The common case is that latch of the loop is empty (apart from the
2289 increment) and immediately follows the loop exit test. Attempt to move the
2290 entry of the loop directly before the exit check and increase the number of
2291 iterations of the loop by one. */
a5a57bf3
TV
2292 if (try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
2293 {
2294 if (dump_file
2295 && (dump_flags & TDF_DETAILS))
2296 fprintf (dump_file,
2297 "alternative exit-first loop transform succeeded"
2298 " for loop %d\n", loop->num);
2299 }
2300 else
7c82d827
TV
2301 {
2302 /* Fall back on the method that handles more cases, but duplicates the
2303 loop body: move the exit condition of LOOP to the beginning of its
2304 header, and duplicate the part of the last iteration that gets disabled
2305 to the exit of the loop. */
2306 transform_to_exit_first_loop (loop, reduction_list, nit);
2307 }
a509ebb5 2308
fa10beec 2309 /* Generate initializations for reductions. */
c203e8a7
TS
2310 if (reduction_list->elements () > 0)
2311 reduction_list->traverse <struct loop *, initialize_reductions> (loop);
5f40b3cb
ZD
2312
2313 /* Eliminate the references to local variables from the loop. */
9f9f72aa
AP
2314 gcc_assert (single_exit (loop));
2315 entry = loop_preheader_edge (loop);
2316 exit = single_dom_exit (loop);
5f40b3cb 2317
9f9f72aa 2318 eliminate_local_variables (entry, exit);
5f40b3cb
ZD
2319 /* In the old loop, move all variables non-local to the loop to a structure
2320 and back, and create separate decls for the variables used in loop. */
b8698a0f 2321 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
9f9f72aa 2322 &new_arg_struct, &clsn_data);
5f40b3cb
ZD
2323
2324 /* Create the parallel constructs. */
9ff70652
JJ
2325 loc = UNKNOWN_LOCATION;
2326 cond_stmt = last_stmt (loop->header);
2327 if (cond_stmt)
2328 loc = gimple_location (cond_stmt);
18751894
TV
2329 create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
2330 new_arg_struct, n_threads, loc);
c203e8a7 2331 if (reduction_list->elements () > 0)
a509ebb5 2332 create_call_for_reduction (loop, reduction_list, &clsn_data);
5f40b3cb
ZD
2333
2334 scev_reset ();
2335
92a6bdbd
SP
2336 /* Free loop bound estimations that could contain references to
2337 removed statements. */
f0bd40b1 2338 FOR_EACH_LOOP (loop, 0)
92a6bdbd 2339 free_numbers_of_iterations_estimates_loop (loop);
5f40b3cb
ZD
2340}
2341
9857228c
SP
2342/* Returns true when LOOP contains vector phi nodes. */
2343
2344static bool
726a989a 2345loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
9857228c
SP
2346{
2347 unsigned i;
2348 basic_block *bbs = get_loop_body_in_dom_order (loop);
538dd0b7 2349 gphi_iterator gsi;
9857228c 2350 bool res = true;
9857228c
SP
2351
2352 for (i = 0; i < loop->num_nodes; i++)
726a989a 2353 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
538dd0b7 2354 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
9857228c
SP
2355 goto end;
2356
2357 res = false;
2358 end:
2359 free (bbs);
2360 return res;
2361}
2362
08dab97a
RL
2363/* Create a reduction_info struct, initialize it with REDUC_STMT
2364 and PHI, insert it to the REDUCTION_LIST. */
2365
2366static void
c203e8a7 2367build_new_reduction (reduction_info_table_type *reduction_list,
355fe088 2368 gimple *reduc_stmt, gphi *phi)
08dab97a 2369{
4a8fb1a1 2370 reduction_info **slot;
08dab97a 2371 struct reduction_info *new_reduction;
12efb1d7 2372 enum tree_code reduction_code;
08dab97a
RL
2373
2374 gcc_assert (reduc_stmt);
b8698a0f 2375
08dab97a
RL
2376 if (dump_file && (dump_flags & TDF_DETAILS))
2377 {
2378 fprintf (dump_file,
2379 "Detected reduction. reduction stmt is: \n");
2380 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
2381 fprintf (dump_file, "\n");
2382 }
b8698a0f 2383
12efb1d7
TV
2384 if (gimple_code (reduc_stmt) == GIMPLE_PHI)
2385 {
2386 tree op1 = PHI_ARG_DEF (reduc_stmt, 0);
355fe088 2387 gimple *def1 = SSA_NAME_DEF_STMT (op1);
12efb1d7
TV
2388 reduction_code = gimple_assign_rhs_code (def1);
2389 }
2390
2391 else
2392 reduction_code = gimple_assign_rhs_code (reduc_stmt);
2393
08dab97a 2394 new_reduction = XCNEW (struct reduction_info);
b8698a0f 2395
08dab97a
RL
2396 new_reduction->reduc_stmt = reduc_stmt;
2397 new_reduction->reduc_phi = phi;
5d1fd1de 2398 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
12efb1d7 2399 new_reduction->reduction_code = reduction_code;
c203e8a7 2400 slot = reduction_list->find_slot (new_reduction, INSERT);
08dab97a
RL
2401 *slot = new_reduction;
2402}
2403
5d1fd1de
JJ
2404/* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
2405
4a8fb1a1
LC
2406int
2407set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
5d1fd1de 2408{
4a8fb1a1 2409 struct reduction_info *const red = *slot;
5d1fd1de
JJ
2410 gimple_set_uid (red->reduc_phi, red->reduc_version);
2411 return 1;
2412}
2413
08dab97a
RL
2414/* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
2415
2416static void
c203e8a7 2417gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
08dab97a 2418{
538dd0b7 2419 gphi_iterator gsi;
08dab97a 2420 loop_vec_info simple_loop_info;
12efb1d7
TV
2421 loop_vec_info simple_inner_loop_info = NULL;
2422 bool allow_double_reduc = true;
08dab97a 2423
fdce493d
TV
2424 if (!stmt_vec_info_vec.exists ())
2425 init_stmt_vec_info_vec ();
2426
08dab97a 2427 simple_loop_info = vect_analyze_loop_form (loop);
1e6a7b01
TV
2428 if (simple_loop_info == NULL)
2429 return;
08dab97a
RL
2430
2431 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2432 {
538dd0b7 2433 gphi *phi = gsi.phi ();
08dab97a
RL
2434 affine_iv iv;
2435 tree res = PHI_RESULT (phi);
2436 bool double_reduc;
2437
ea057359 2438 if (virtual_operand_p (res))
08dab97a
RL
2439 continue;
2440
1e6a7b01
TV
2441 if (simple_iv (loop, loop, res, &iv, true))
2442 continue;
2443
355fe088 2444 gimple *reduc_stmt
1e6a7b01
TV
2445 = vect_force_simple_reduction (simple_loop_info, phi, true,
2446 &double_reduc, true);
12efb1d7 2447 if (!reduc_stmt)
1e6a7b01
TV
2448 continue;
2449
12efb1d7
TV
2450 if (double_reduc)
2451 {
2452 if (!allow_double_reduc
2453 || loop->inner->inner != NULL)
2454 continue;
2455
2456 if (!simple_inner_loop_info)
2457 {
2458 simple_inner_loop_info = vect_analyze_loop_form (loop->inner);
2459 if (!simple_inner_loop_info)
2460 {
2461 allow_double_reduc = false;
2462 continue;
2463 }
2464 }
2465
2466 use_operand_p use_p;
355fe088 2467 gimple *inner_stmt;
12efb1d7
TV
2468 bool single_use_p = single_imm_use (res, &use_p, &inner_stmt);
2469 gcc_assert (single_use_p);
2470 gphi *inner_phi = as_a <gphi *> (inner_stmt);
2471 if (simple_iv (loop->inner, loop->inner, PHI_RESULT (inner_phi),
2472 &iv, true))
2473 continue;
2474
355fe088 2475 gimple *inner_reduc_stmt
12efb1d7
TV
2476 = vect_force_simple_reduction (simple_inner_loop_info, inner_phi,
2477 true, &double_reduc, true);
2478 gcc_assert (!double_reduc);
2479 if (inner_reduc_stmt == NULL)
2480 continue;
2481 }
2482
1e6a7b01 2483 build_new_reduction (reduction_list, reduc_stmt, phi);
08dab97a 2484 }
5d1fd1de 2485 destroy_loop_vec_info (simple_loop_info, true);
12efb1d7 2486 destroy_loop_vec_info (simple_inner_loop_info, true);
5d1fd1de 2487
fdce493d
TV
2488 /* Release the claim on gimple_uid. */
2489 free_stmt_vec_info_vec ();
2490
5d1fd1de 2491 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
fdce493d
TV
2492 and free_stmt_vec_info_vec, we can set gimple_uid of reduc_phi stmts only
2493 now. */
2494 basic_block bb;
2495 FOR_EACH_BB_FN (bb, cfun)
2496 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
2497 gimple_set_uid (gsi_stmt (gsi), (unsigned int)-1);
c203e8a7 2498 reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
08dab97a
RL
2499}
2500
2501/* Try to initialize NITER for code generation part. */
2502
2503static bool
2504try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2505{
2506 edge exit = single_dom_exit (loop);
2507
2508 gcc_assert (exit);
2509
2510 /* We need to know # of iterations, and there should be no uses of values
2511 defined inside loop outside of it, unless the values are invariants of
2512 the loop. */
2513 if (!number_of_iterations_exit (loop, exit, niter, false))
2514 {
2515 if (dump_file && (dump_flags & TDF_DETAILS))
2516 fprintf (dump_file, " FAILED: number of iterations not known\n");
2517 return false;
2518 }
2519
2520 return true;
2521}
2522
2523/* Try to initialize REDUCTION_LIST for code generation part.
2524 REDUCTION_LIST describes the reductions. */
2525
2526static bool
4a8fb1a1 2527try_create_reduction_list (loop_p loop,
c203e8a7 2528 reduction_info_table_type *reduction_list)
08dab97a
RL
2529{
2530 edge exit = single_dom_exit (loop);
538dd0b7 2531 gphi_iterator gsi;
08dab97a
RL
2532
2533 gcc_assert (exit);
2534
2535 gather_scalar_reductions (loop, reduction_list);
2536
b8698a0f 2537
08dab97a
RL
2538 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2539 {
538dd0b7 2540 gphi *phi = gsi.phi ();
08dab97a
RL
2541 struct reduction_info *red;
2542 imm_use_iterator imm_iter;
2543 use_operand_p use_p;
355fe088 2544 gimple *reduc_phi;
08dab97a
RL
2545 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2546
ea057359 2547 if (!virtual_operand_p (val))
08dab97a
RL
2548 {
2549 if (dump_file && (dump_flags & TDF_DETAILS))
2550 {
2551 fprintf (dump_file, "phi is ");
2552 print_gimple_stmt (dump_file, phi, 0, 0);
2553 fprintf (dump_file, "arg of phi to exit: value ");
2554 print_generic_expr (dump_file, val, 0);
2555 fprintf (dump_file, " used outside loop\n");
2556 fprintf (dump_file,
2557 " checking if it a part of reduction pattern: \n");
2558 }
c203e8a7 2559 if (reduction_list->elements () == 0)
08dab97a
RL
2560 {
2561 if (dump_file && (dump_flags & TDF_DETAILS))
2562 fprintf (dump_file,
2563 " FAILED: it is not a part of reduction.\n");
2564 return false;
2565 }
2566 reduc_phi = NULL;
2567 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2568 {
4942af9b
JJ
2569 if (!gimple_debug_bind_p (USE_STMT (use_p))
2570 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
08dab97a
RL
2571 {
2572 reduc_phi = USE_STMT (use_p);
2573 break;
2574 }
2575 }
2576 red = reduction_phi (reduction_list, reduc_phi);
2577 if (red == NULL)
2578 {
2579 if (dump_file && (dump_flags & TDF_DETAILS))
2580 fprintf (dump_file,
2581 " FAILED: it is not a part of reduction.\n");
2582 return false;
2583 }
2584 if (dump_file && (dump_flags & TDF_DETAILS))
2585 {
2586 fprintf (dump_file, "reduction phi is ");
2587 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2588 fprintf (dump_file, "reduction stmt is ");
2589 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2590 }
2591 }
2592 }
2593
2594 /* The iterations of the loop may communicate only through bivs whose
2595 iteration space can be distributed efficiently. */
2596 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2597 {
538dd0b7 2598 gphi *phi = gsi.phi ();
08dab97a
RL
2599 tree def = PHI_RESULT (phi);
2600 affine_iv iv;
2601
ea057359 2602 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
08dab97a
RL
2603 {
2604 struct reduction_info *red;
2605
2606 red = reduction_phi (reduction_list, phi);
2607 if (red == NULL)
2608 {
2609 if (dump_file && (dump_flags & TDF_DETAILS))
2610 fprintf (dump_file,
2611 " FAILED: scalar dependency between iterations\n");
2612 return false;
2613 }
2614 }
2615 }
2616
2617
2618 return true;
2619}
2620
5f40b3cb
ZD
2621/* Detect parallel loops and generate parallel code using libgomp
2622 primitives. Returns true if some loop was parallelized, false
2623 otherwise. */
2624
09489eb8 2625static bool
5f40b3cb
ZD
2626parallelize_loops (void)
2627{
2628 unsigned n_threads = flag_tree_parallelize_loops;
2629 bool changed = false;
2630 struct loop *loop;
e67d7a1e 2631 struct loop *skip_loop = NULL;
5f40b3cb 2632 struct tree_niter_desc niter_desc;
f873b205 2633 struct obstack parloop_obstack;
8adfe01d 2634 HOST_WIDE_INT estimated;
b05e0233 2635 source_location loop_loc;
f873b205 2636
5f40b3cb
ZD
2637 /* Do not parallelize loops in the functions created by parallelization. */
2638 if (parallelized_function_p (cfun->decl))
2639 return false;
8adfe01d
RL
2640 if (cfun->has_nonlocal_label)
2641 return false;
5f40b3cb 2642
f873b205 2643 gcc_obstack_init (&parloop_obstack);
c203e8a7 2644 reduction_info_table_type reduction_list (10);
a509ebb5 2645
f0bd40b1 2646 FOR_EACH_LOOP (loop, 0)
5f40b3cb 2647 {
e67d7a1e
TV
2648 if (loop == skip_loop)
2649 {
2650 if (dump_file && (dump_flags & TDF_DETAILS))
2651 fprintf (dump_file,
2652 "Skipping loop %d as inner loop of parallelized loop\n",
2653 loop->num);
2654
2655 skip_loop = loop->inner;
2656 continue;
2657 }
2658 else
2659 skip_loop = NULL;
2660
4a8fb1a1 2661 reduction_list.empty ();
48710229
RL
2662 if (dump_file && (dump_flags & TDF_DETAILS))
2663 {
2664 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2665 if (loop->inner)
2666 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2667 else
2668 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2669 }
b8698a0f 2670
48710229 2671 /* If we use autopar in graphite pass, we use its marked dependency
87d4d0ee
SP
2672 checking results. */
2673 if (flag_loop_parallelize_all && !loop->can_be_parallel)
48710229
RL
2674 {
2675 if (dump_file && (dump_flags & TDF_DETAILS))
2676 fprintf (dump_file, "loop is not parallel according to graphite\n");
87d4d0ee 2677 continue;
48710229 2678 }
87d4d0ee 2679
48710229
RL
2680 if (!single_dom_exit (loop))
2681 {
b8698a0f 2682
48710229
RL
2683 if (dump_file && (dump_flags & TDF_DETAILS))
2684 fprintf (dump_file, "loop is !single_dom_exit\n");
b8698a0f 2685
08dab97a 2686 continue;
48710229 2687 }
08dab97a
RL
2688
2689 if (/* And of course, the loop must be parallelizable. */
2690 !can_duplicate_loop_p (loop)
1d4af1e8 2691 || loop_has_blocks_with_irreducible_flag (loop)
8adfe01d 2692 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
9857228c 2693 /* FIXME: the check for vector phi nodes could be removed. */
69958396 2694 || loop_has_vector_phi_nodes (loop))
08dab97a 2695 continue;
e5b332cd 2696
652c4c71 2697 estimated = estimated_stmt_executions_int (loop);
e5b332cd
RG
2698 if (estimated == -1)
2699 estimated = max_stmt_executions_int (loop);
87d4d0ee 2700 /* FIXME: Bypass this check as graphite doesn't update the
e5b332cd 2701 count and frequency correctly now. */
87d4d0ee 2702 if (!flag_loop_parallelize_all
e5b332cd
RG
2703 && ((estimated != -1
2704 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
87d4d0ee
SP
2705 /* Do not bother with loops in cold areas. */
2706 || optimize_loop_nest_for_size_p (loop)))
08dab97a 2707 continue;
b8698a0f 2708
08dab97a
RL
2709 if (!try_get_loop_niter (loop, &niter_desc))
2710 continue;
2711
c203e8a7 2712 if (!try_create_reduction_list (loop, &reduction_list))
08dab97a
RL
2713 continue;
2714
f873b205
LB
2715 if (!flag_loop_parallelize_all
2716 && !loop_parallel_p (loop, &parloop_obstack))
5f40b3cb
ZD
2717 continue;
2718
2719 changed = true;
e67d7a1e 2720 skip_loop = loop->inner;
48710229
RL
2721 if (dump_file && (dump_flags & TDF_DETAILS))
2722 {
48710229 2723 if (loop->inner)
8adfe01d 2724 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
48710229 2725 else
8adfe01d
RL
2726 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2727 loop_loc = find_loop_location (loop);
b05e0233 2728 if (loop_loc != UNKNOWN_LOCATION)
8adfe01d 2729 fprintf (dump_file, "\nloop at %s:%d: ",
b05e0233 2730 LOCATION_FILE (loop_loc), LOCATION_LINE (loop_loc));
b8698a0f 2731 }
c203e8a7 2732 gen_parallel_loop (loop, &reduction_list,
08dab97a 2733 n_threads, &niter_desc);
5f40b3cb
ZD
2734 }
2735
f873b205 2736 obstack_free (&parloop_obstack, NULL);
6b8ed145
RG
2737
2738 /* Parallelization will cause new function calls to be inserted through
d086d311
RG
2739 which local variables will escape. Reset the points-to solution
2740 for ESCAPED. */
6b8ed145 2741 if (changed)
d086d311 2742 pt_solution_reset (&cfun->gimple_df->escaped);
6b8ed145 2743
5f40b3cb
ZD
2744 return changed;
2745}
2746
c1bf2a39
AM
2747/* Parallelization. */
2748
c1bf2a39
AM
2749namespace {
2750
2751const pass_data pass_data_parallelize_loops =
2752{
2753 GIMPLE_PASS, /* type */
2754 "parloops", /* name */
2755 OPTGROUP_LOOP, /* optinfo_flags */
c1bf2a39
AM
2756 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
2757 ( PROP_cfg | PROP_ssa ), /* properties_required */
2758 0, /* properties_provided */
2759 0, /* properties_destroyed */
2760 0, /* todo_flags_start */
3bea341f 2761 0, /* todo_flags_finish */
c1bf2a39
AM
2762};
2763
2764class pass_parallelize_loops : public gimple_opt_pass
2765{
2766public:
2767 pass_parallelize_loops (gcc::context *ctxt)
2768 : gimple_opt_pass (pass_data_parallelize_loops, ctxt)
2769 {}
2770
2771 /* opt_pass methods: */
1a3d085c 2772 virtual bool gate (function *) { return flag_tree_parallelize_loops > 1; }
be55bfe6 2773 virtual unsigned int execute (function *);
c1bf2a39
AM
2774
2775}; // class pass_parallelize_loops
2776
be55bfe6
TS
2777unsigned
2778pass_parallelize_loops::execute (function *fun)
2779{
2780 if (number_of_loops (fun) <= 1)
2781 return 0;
2782
2783 if (parallelize_loops ())
18751894
TV
2784 {
2785 fun->curr_properties &= ~(PROP_gimple_eomp);
e67d7a1e 2786
b2b29377 2787 checking_verify_loop_structure ();
e67d7a1e 2788
18751894
TV
2789 return TODO_update_ssa;
2790 }
2791
be55bfe6
TS
2792 return 0;
2793}
2794
c1bf2a39
AM
2795} // anon namespace
2796
2797gimple_opt_pass *
2798make_pass_parallelize_loops (gcc::context *ctxt)
2799{
2800 return new pass_parallelize_loops (ctxt);
2801}