]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-parloops.c
Handle unused reduction in create_loads_for_reductions
[thirdparty/gcc.git] / gcc / tree-parloops.c
CommitLineData
5f40b3cb 1/* Loop autoparallelization.
5624e564 2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
70837b71
RL
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5f40b3cb
ZD
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
6da7fc87 10Software Foundation; either version 3, or (at your option) any later
5f40b3cb
ZD
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
6da7fc87
NC
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
5f40b3cb
ZD
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
40e23961 25#include "alias.h"
c7131fb2 26#include "backend.h"
9fdcd34e 27#include "cfghooks.h"
40e23961 28#include "tree.h"
c7131fb2 29#include "gimple.h"
60393bbc 30#include "hard-reg-set.h"
c7131fb2
AM
31#include "ssa.h"
32#include "options.h"
33#include "fold-const.h"
2fb9a547 34#include "internal-fn.h"
45b0be94 35#include "gimplify.h"
5be5c238 36#include "gimple-iterator.h"
18f429e2 37#include "gimplify-me.h"
5be5c238 38#include "gimple-walk.h"
d8a2d370
DN
39#include "stor-layout.h"
40#include "tree-nested.h"
442b4905 41#include "tree-cfg.h"
e28030cf
AM
42#include "tree-ssa-loop-ivopts.h"
43#include "tree-ssa-loop-manip.h"
44#include "tree-ssa-loop-niter.h"
442b4905
AM
45#include "tree-ssa-loop.h"
46#include "tree-into-ssa.h"
5f40b3cb 47#include "cfgloop.h"
5f40b3cb 48#include "tree-data-ref.h"
1bd6497c 49#include "tree-scalar-evolution.h"
cf835838 50#include "gimple-pretty-print.h"
5f40b3cb 51#include "tree-pass.h"
5f40b3cb 52#include "langhooks.h"
a509ebb5 53#include "tree-vectorizer.h"
4a8fb1a1 54#include "tree-hasher.h"
c1bf2a39 55#include "tree-parloops.h"
0645c1a2 56#include "omp-low.h"
1fe37220 57#include "tree-nested.h"
a79b7ec5 58#include "cgraph.h"
7c82d827 59#include "tree-ssa.h"
5f40b3cb
ZD
60
61/* This pass tries to distribute iterations of loops into several threads.
62 The implementation is straightforward -- for each loop we test whether its
63 iterations are independent, and if it is the case (and some additional
64 conditions regarding profitability and correctness are satisfied), we
726a989a
RB
65 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
66 machinery do its job.
b8698a0f 67
5f40b3cb
ZD
68 The most of the complexity is in bringing the code into shape expected
69 by the omp expanders:
726a989a
RB
70 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
71 variable and that the exit test is at the start of the loop body
72 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
5f40b3cb
ZD
73 variables by accesses through pointers, and breaking up ssa chains
74 by storing the values incoming to the parallelized loop to a structure
75 passed to the new function as an argument (something similar is done
76 in omp gimplification, unfortunately only a small part of the code
77 can be shared).
78
79 TODO:
80 -- if there are several parallelizable loops in a function, it may be
81 possible to generate the threads just once (using synchronization to
82 ensure that cross-loop dependences are obeyed).
70837b71
RL
83 -- handling of common reduction patterns for outer loops.
84
85 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
b8698a0f 86/*
a509ebb5 87 Reduction handling:
8a9ecffd 88 currently we use vect_force_simple_reduction() to detect reduction patterns.
a509ebb5 89 The code transformation will be introduced by an example.
b8698a0f
L
90
91
a509ebb5
RL
92parloop
93{
94 int sum=1;
95
0eb7e7aa 96 for (i = 0; i < N; i++)
a509ebb5
RL
97 {
98 x[i] = i + 3;
99 sum+=x[i];
100 }
101}
102
0eb7e7aa 103gimple-like code:
a509ebb5
RL
104header_bb:
105
0eb7e7aa
RL
106 # sum_29 = PHI <sum_11(5), 1(3)>
107 # i_28 = PHI <i_12(5), 0(3)>
108 D.1795_8 = i_28 + 3;
109 x[i_28] = D.1795_8;
110 sum_11 = D.1795_8 + sum_29;
111 i_12 = i_28 + 1;
112 if (N_6(D) > i_12)
113 goto header_bb;
114
a509ebb5
RL
115
116exit_bb:
117
0eb7e7aa
RL
118 # sum_21 = PHI <sum_11(4)>
119 printf (&"%d"[0], sum_21);
a509ebb5
RL
120
121
122after reduction transformation (only relevant parts):
123
124parloop
125{
126
127....
128
0eb7e7aa 129
fa10beec 130 # Storing the initial value given by the user. #
0eb7e7aa 131
ae0bce62 132 .paral_data_store.32.sum.27 = 1;
b8698a0f
L
133
134 #pragma omp parallel num_threads(4)
a509ebb5 135
0eb7e7aa 136 #pragma omp for schedule(static)
ae0bce62
RL
137
138 # The neutral element corresponding to the particular
139 reduction's operation, e.g. 0 for PLUS_EXPR,
140 1 for MULT_EXPR, etc. replaces the user's initial value. #
141
142 # sum.27_29 = PHI <sum.27_11, 0>
143
0eb7e7aa 144 sum.27_11 = D.1827_8 + sum.27_29;
ae0bce62 145
726a989a 146 GIMPLE_OMP_CONTINUE
a509ebb5 147
0eb7e7aa
RL
148 # Adding this reduction phi is done at create_phi_for_local_result() #
149 # sum.27_56 = PHI <sum.27_11, 0>
726a989a 150 GIMPLE_OMP_RETURN
b8698a0f
L
151
152 # Creating the atomic operation is done at
0eb7e7aa 153 create_call_for_reduction_1() #
a509ebb5 154
0eb7e7aa
RL
155 #pragma omp atomic_load
156 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
157 D.1840_60 = sum.27_56 + D.1839_59;
158 #pragma omp atomic_store (D.1840_60);
b8698a0f 159
726a989a 160 GIMPLE_OMP_RETURN
b8698a0f 161
0eb7e7aa
RL
162 # collecting the result after the join of the threads is done at
163 create_loads_for_reductions().
ae0bce62
RL
164 The value computed by the threads is loaded from the
165 shared struct. #
166
b8698a0f 167
0eb7e7aa 168 .paral_data_load.33_52 = &.paral_data_store.32;
ae0bce62 169 sum_37 = .paral_data_load.33_52->sum.27;
0eb7e7aa
RL
170 sum_43 = D.1795_41 + sum_37;
171
172 exit bb:
173 # sum_21 = PHI <sum_43, sum_26>
174 printf (&"%d"[0], sum_21);
175
176...
177
a509ebb5
RL
178}
179
180*/
181
5f40b3cb
ZD
182/* Minimal number of iterations of a loop that should be executed in each
183 thread. */
184#define MIN_PER_THREAD 100
185
b8698a0f 186/* Element of the hashtable, representing a
a509ebb5
RL
187 reduction in the current loop. */
188struct reduction_info
189{
726a989a
RB
190 gimple reduc_stmt; /* reduction statement. */
191 gimple reduc_phi; /* The phi node defining the reduction. */
192 enum tree_code reduction_code;/* code for the reduction operation. */
5d1fd1de
JJ
193 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
194 result. */
538dd0b7 195 gphi *keep_res; /* The PHI_RESULT of this phi is the resulting value
a509ebb5 196 of the reduction variable when existing the loop. */
ae0bce62 197 tree initial_value; /* The initial value of the reduction var before entering the loop. */
a509ebb5 198 tree field; /* the name of the field in the parloop data structure intended for reduction. */
a509ebb5 199 tree init; /* reduction initialization value. */
538dd0b7 200 gphi *new_phi; /* (helper field) Newly created phi node whose result
a509ebb5
RL
201 will be passed to the atomic operation. Represents
202 the local result each thread computed for the reduction
203 operation. */
204};
205
4a8fb1a1 206/* Reduction info hashtable helpers. */
a509ebb5 207
95fbe13e 208struct reduction_hasher : free_ptr_hash <reduction_info>
a509ebb5 209{
67f58944
TS
210 static inline hashval_t hash (const reduction_info *);
211 static inline bool equal (const reduction_info *, const reduction_info *);
4a8fb1a1
LC
212};
213
214/* Equality and hash functions for hashtab code. */
a509ebb5 215
4a8fb1a1 216inline bool
67f58944 217reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
4a8fb1a1 218{
a509ebb5
RL
219 return (a->reduc_phi == b->reduc_phi);
220}
221
4a8fb1a1 222inline hashval_t
67f58944 223reduction_hasher::hash (const reduction_info *a)
a509ebb5 224{
5d1fd1de 225 return a->reduc_version;
a509ebb5
RL
226}
227
c203e8a7 228typedef hash_table<reduction_hasher> reduction_info_table_type;
4a8fb1a1
LC
229
230
a509ebb5 231static struct reduction_info *
c203e8a7 232reduction_phi (reduction_info_table_type *reduction_list, gimple phi)
a509ebb5
RL
233{
234 struct reduction_info tmpred, *red;
235
c203e8a7 236 if (reduction_list->elements () == 0 || phi == NULL)
a509ebb5
RL
237 return NULL;
238
239 tmpred.reduc_phi = phi;
5d1fd1de 240 tmpred.reduc_version = gimple_uid (phi);
c203e8a7 241 red = reduction_list->find (&tmpred);
a509ebb5
RL
242
243 return red;
244}
245
5f40b3cb
ZD
246/* Element of hashtable of names to copy. */
247
248struct name_to_copy_elt
249{
250 unsigned version; /* The version of the name to copy. */
251 tree new_name; /* The new name used in the copy. */
252 tree field; /* The field of the structure used to pass the
253 value. */
254};
255
4a8fb1a1 256/* Name copies hashtable helpers. */
5f40b3cb 257
95fbe13e 258struct name_to_copy_hasher : free_ptr_hash <name_to_copy_elt>
5f40b3cb 259{
67f58944
TS
260 static inline hashval_t hash (const name_to_copy_elt *);
261 static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
4a8fb1a1
LC
262};
263
264/* Equality and hash functions for hashtab code. */
5f40b3cb 265
4a8fb1a1 266inline bool
67f58944 267name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
4a8fb1a1 268{
5f40b3cb
ZD
269 return a->version == b->version;
270}
271
4a8fb1a1 272inline hashval_t
67f58944 273name_to_copy_hasher::hash (const name_to_copy_elt *a)
5f40b3cb 274{
5f40b3cb
ZD
275 return (hashval_t) a->version;
276}
277
c203e8a7 278typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
4a8fb1a1 279
b305e3da
SP
280/* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
281 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
282 represents the denominator for every element in the matrix. */
283typedef struct lambda_trans_matrix_s
284{
285 lambda_matrix matrix;
286 int rowsize;
287 int colsize;
288 int denominator;
289} *lambda_trans_matrix;
290#define LTM_MATRIX(T) ((T)->matrix)
291#define LTM_ROWSIZE(T) ((T)->rowsize)
292#define LTM_COLSIZE(T) ((T)->colsize)
293#define LTM_DENOMINATOR(T) ((T)->denominator)
294
295/* Allocate a new transformation matrix. */
296
297static lambda_trans_matrix
298lambda_trans_matrix_new (int colsize, int rowsize,
299 struct obstack * lambda_obstack)
300{
301 lambda_trans_matrix ret;
302
303 ret = (lambda_trans_matrix)
304 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
305 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
306 LTM_ROWSIZE (ret) = rowsize;
307 LTM_COLSIZE (ret) = colsize;
308 LTM_DENOMINATOR (ret) = 1;
309 return ret;
310}
311
312/* Multiply a vector VEC by a matrix MAT.
313 MAT is an M*N matrix, and VEC is a vector with length N. The result
314 is stored in DEST which must be a vector of length M. */
315
316static void
317lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
318 lambda_vector vec, lambda_vector dest)
319{
320 int i, j;
321
322 lambda_vector_clear (dest, m);
323 for (i = 0; i < m; i++)
324 for (j = 0; j < n; j++)
325 dest[i] += matrix[i][j] * vec[j];
326}
327
328/* Return true if TRANS is a legal transformation matrix that respects
329 the dependence vectors in DISTS and DIRS. The conservative answer
330 is false.
331
332 "Wolfe proves that a unimodular transformation represented by the
333 matrix T is legal when applied to a loop nest with a set of
334 lexicographically non-negative distance vectors RDG if and only if
335 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
336 i.e.: if and only if it transforms the lexicographically positive
337 distance vectors to lexicographically positive vectors. Note that
338 a unimodular matrix must transform the zero vector (and only it) to
339 the zero vector." S.Muchnick. */
340
341static bool
342lambda_transform_legal_p (lambda_trans_matrix trans,
343 int nb_loops,
9771b263 344 vec<ddr_p> dependence_relations)
b305e3da
SP
345{
346 unsigned int i, j;
347 lambda_vector distres;
348 struct data_dependence_relation *ddr;
349
350 gcc_assert (LTM_COLSIZE (trans) == nb_loops
351 && LTM_ROWSIZE (trans) == nb_loops);
352
353 /* When there are no dependences, the transformation is correct. */
9771b263 354 if (dependence_relations.length () == 0)
b305e3da
SP
355 return true;
356
9771b263 357 ddr = dependence_relations[0];
b305e3da
SP
358 if (ddr == NULL)
359 return true;
360
361 /* When there is an unknown relation in the dependence_relations, we
362 know that it is no worth looking at this loop nest: give up. */
363 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
364 return false;
365
366 distres = lambda_vector_new (nb_loops);
367
368 /* For each distance vector in the dependence graph. */
9771b263 369 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
b305e3da
SP
370 {
371 /* Don't care about relations for which we know that there is no
372 dependence, nor about read-read (aka. output-dependences):
373 these data accesses can happen in any order. */
374 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
375 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
376 continue;
377
378 /* Conservatively answer: "this transformation is not valid". */
379 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
380 return false;
381
382 /* If the dependence could not be captured by a distance vector,
383 conservatively answer that the transform is not valid. */
384 if (DDR_NUM_DIST_VECTS (ddr) == 0)
385 return false;
386
387 /* Compute trans.dist_vect */
388 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
389 {
390 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
391 DDR_DIST_VECT (ddr, j), distres);
392
393 if (!lambda_vector_lexico_pos (distres, nb_loops))
394 return false;
395 }
396 }
397 return true;
398}
08dab97a
RL
399
400/* Data dependency analysis. Returns true if the iterations of LOOP
401 are independent on each other (that is, if we can execute them
402 in parallel). */
5f40b3cb
ZD
403
404static bool
f873b205 405loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
5f40b3cb 406{
9771b263
DN
407 vec<ddr_p> dependence_relations;
408 vec<data_reference_p> datarefs;
5f40b3cb
ZD
409 lambda_trans_matrix trans;
410 bool ret = false;
5f40b3cb
ZD
411
412 if (dump_file && (dump_flags & TDF_DETAILS))
48710229
RL
413 {
414 fprintf (dump_file, "Considering loop %d\n", loop->num);
415 if (!loop->inner)
416 fprintf (dump_file, "loop is innermost\n");
b8698a0f 417 else
48710229
RL
418 fprintf (dump_file, "loop NOT innermost\n");
419 }
5f40b3cb 420
5f40b3cb
ZD
421 /* Check for problems with dependences. If the loop can be reversed,
422 the iterations are independent. */
00f96dc9 423 auto_vec<loop_p, 3> loop_nest;
9771b263 424 datarefs.create (10);
07687835 425 dependence_relations.create (100);
9ca3d00e
AB
426 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
427 &dependence_relations))
428 {
429 if (dump_file && (dump_flags & TDF_DETAILS))
430 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
431 ret = false;
432 goto end;
433 }
5f40b3cb
ZD
434 if (dump_file && (dump_flags & TDF_DETAILS))
435 dump_data_dependence_relations (dump_file, dependence_relations);
436
f873b205 437 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
5f40b3cb
ZD
438 LTM_MATRIX (trans)[0][0] = -1;
439
440 if (lambda_transform_legal_p (trans, 1, dependence_relations))
441 {
442 ret = true;
443 if (dump_file && (dump_flags & TDF_DETAILS))
444 fprintf (dump_file, " SUCCESS: may be parallelized\n");
445 }
446 else if (dump_file && (dump_flags & TDF_DETAILS))
a509ebb5
RL
447 fprintf (dump_file,
448 " FAILED: data dependencies exist across iterations\n");
5f40b3cb 449
9ca3d00e 450 end:
5f40b3cb
ZD
451 free_dependence_relations (dependence_relations);
452 free_data_refs (datarefs);
453
454 return ret;
455}
456
1d4af1e8
SP
457/* Return true when LOOP contains basic blocks marked with the
458 BB_IRREDUCIBLE_LOOP flag. */
459
460static inline bool
461loop_has_blocks_with_irreducible_flag (struct loop *loop)
462{
463 unsigned i;
464 basic_block *bbs = get_loop_body_in_dom_order (loop);
465 bool res = true;
466
467 for (i = 0; i < loop->num_nodes; i++)
468 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
469 goto end;
470
471 res = false;
472 end:
473 free (bbs);
474 return res;
475}
476
8a171a59 477/* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
9f9f72aa 478 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
8a171a59 479 to their addresses that can be reused. The address of OBJ is known to
cba1eb61
JJ
480 be invariant in the whole function. Other needed statements are placed
481 right before GSI. */
5f40b3cb
ZD
482
483static tree
4a8fb1a1 484take_address_of (tree obj, tree type, edge entry,
c203e8a7 485 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
5f40b3cb 486{
8a171a59 487 int uid;
83d5977e 488 tree *var_p, name, addr;
538dd0b7 489 gassign *stmt;
726a989a 490 gimple_seq stmts;
5f40b3cb 491
8a171a59
ZD
492 /* Since the address of OBJ is invariant, the trees may be shared.
493 Avoid rewriting unrelated parts of the code. */
494 obj = unshare_expr (obj);
495 for (var_p = &obj;
496 handled_component_p (*var_p);
497 var_p = &TREE_OPERAND (*var_p, 0))
498 continue;
8a171a59 499
c9a410f0
RG
500 /* Canonicalize the access to base on a MEM_REF. */
501 if (DECL_P (*var_p))
502 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
503
504 /* Assign a canonical SSA name to the address of the base decl used
505 in the address and share it for all accesses and addresses based
506 on it. */
507 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
84baa4b9
TS
508 int_tree_map elt;
509 elt.uid = uid;
510 int_tree_map *slot = decl_address->find_slot (elt, INSERT);
511 if (!slot->to)
5f40b3cb 512 {
cba1eb61
JJ
513 if (gsi == NULL)
514 return NULL;
c9a410f0 515 addr = TREE_OPERAND (*var_p, 0);
29b89442
JJ
516 const char *obj_name
517 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
518 if (obj_name)
519 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
520 else
b731b390 521 name = make_ssa_name (TREE_TYPE (addr));
83d5977e 522 stmt = gimple_build_assign (name, addr);
726a989a 523 gsi_insert_on_edge_immediate (entry, stmt);
5f40b3cb 524
84baa4b9
TS
525 slot->uid = uid;
526 slot->to = name;
5f40b3cb 527 }
8a171a59 528 else
84baa4b9 529 name = slot->to;
5f40b3cb 530
c9a410f0
RG
531 /* Express the address in terms of the canonical SSA name. */
532 TREE_OPERAND (*var_p, 0) = name;
cba1eb61
JJ
533 if (gsi == NULL)
534 return build_fold_addr_expr_with_type (obj, type);
535
c9a410f0
RG
536 name = force_gimple_operand (build_addr (obj, current_function_decl),
537 &stmts, true, NULL_TREE);
538 if (!gimple_seq_empty_p (stmts))
cba1eb61 539 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
5f40b3cb 540
c9a410f0 541 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
8a171a59 542 {
726a989a 543 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
8a171a59 544 NULL_TREE);
726a989a 545 if (!gimple_seq_empty_p (stmts))
cba1eb61 546 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
8a171a59 547 }
5f40b3cb
ZD
548
549 return name;
550}
551
a509ebb5 552/* Callback for htab_traverse. Create the initialization statement
b8698a0f 553 for reduction described in SLOT, and place it at the preheader of
a509ebb5
RL
554 the loop described in DATA. */
555
4a8fb1a1
LC
556int
557initialize_reductions (reduction_info **slot, struct loop *loop)
a509ebb5 558{
a509ebb5 559 tree init, c;
a509ebb5
RL
560 tree bvar, type, arg;
561 edge e;
562
4a8fb1a1 563 struct reduction_info *const reduc = *slot;
a509ebb5 564
b8698a0f 565 /* Create initialization in preheader:
a509ebb5
RL
566 reduction_variable = initialization value of reduction. */
567
b8698a0f 568 /* In the phi node at the header, replace the argument coming
a509ebb5
RL
569 from the preheader with the reduction initialization value. */
570
571 /* Create a new variable to initialize the reduction. */
572 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
573 bvar = create_tmp_var (type, "reduction");
a509ebb5 574
c2255bc4
AH
575 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
576 OMP_CLAUSE_REDUCTION);
a509ebb5 577 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
726a989a 578 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
a509ebb5
RL
579
580 init = omp_reduction_init (c, TREE_TYPE (bvar));
581 reduc->init = init;
582
b8698a0f
L
583 /* Replace the argument representing the initialization value
584 with the initialization value for the reduction (neutral
585 element for the particular operation, e.g. 0 for PLUS_EXPR,
586 1 for MULT_EXPR, etc).
587 Keep the old value in a new variable "reduction_initial",
588 that will be taken in consideration after the parallel
0eb7e7aa 589 computing is done. */
a509ebb5
RL
590
591 e = loop_preheader_edge (loop);
592 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
593 /* Create new variable to hold the initial value. */
a509ebb5 594
a509ebb5 595 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
0eb7e7aa 596 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
ae0bce62 597 reduc->initial_value = arg;
a509ebb5
RL
598 return 1;
599}
5f40b3cb
ZD
600
601struct elv_data
602{
726a989a 603 struct walk_stmt_info info;
9f9f72aa 604 edge entry;
c203e8a7 605 int_tree_htab_type *decl_address;
cba1eb61 606 gimple_stmt_iterator *gsi;
5f40b3cb 607 bool changed;
cba1eb61 608 bool reset;
5f40b3cb
ZD
609};
610
9f9f72aa
AP
611/* Eliminates references to local variables in *TP out of the single
612 entry single exit region starting at DTA->ENTRY.
613 DECL_ADDRESS contains addresses of the references that had their
614 address taken already. If the expression is changed, CHANGED is
615 set to true. Callback for walk_tree. */
a509ebb5 616
5f40b3cb 617static tree
8a171a59 618eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
5f40b3cb 619{
3d9a9f94 620 struct elv_data *const dta = (struct elv_data *) data;
8a171a59 621 tree t = *tp, var, addr, addr_type, type, obj;
5f40b3cb
ZD
622
623 if (DECL_P (t))
624 {
625 *walk_subtrees = 0;
626
627 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
628 return NULL_TREE;
629
630 type = TREE_TYPE (t);
631 addr_type = build_pointer_type (type);
cba1eb61
JJ
632 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
633 dta->gsi);
634 if (dta->gsi == NULL && addr == NULL_TREE)
635 {
636 dta->reset = true;
637 return NULL_TREE;
638 }
639
70f34814 640 *tp = build_simple_mem_ref (addr);
5f40b3cb
ZD
641
642 dta->changed = true;
643 return NULL_TREE;
644 }
645
646 if (TREE_CODE (t) == ADDR_EXPR)
647 {
8a171a59
ZD
648 /* ADDR_EXPR may appear in two contexts:
649 -- as a gimple operand, when the address taken is a function invariant
650 -- as gimple rhs, when the resulting address in not a function
651 invariant
652 We do not need to do anything special in the latter case (the base of
653 the memory reference whose address is taken may be replaced in the
654 DECL_P case). The former case is more complicated, as we need to
655 ensure that the new address is still a gimple operand. Thus, it
656 is not sufficient to replace just the base of the memory reference --
657 we need to move the whole computation of the address out of the
658 loop. */
659 if (!is_gimple_val (t))
5f40b3cb
ZD
660 return NULL_TREE;
661
662 *walk_subtrees = 0;
8a171a59
ZD
663 obj = TREE_OPERAND (t, 0);
664 var = get_base_address (obj);
665 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
5f40b3cb
ZD
666 return NULL_TREE;
667
668 addr_type = TREE_TYPE (t);
cba1eb61
JJ
669 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
670 dta->gsi);
671 if (dta->gsi == NULL && addr == NULL_TREE)
672 {
673 dta->reset = true;
674 return NULL_TREE;
675 }
5f40b3cb
ZD
676 *tp = addr;
677
678 dta->changed = true;
679 return NULL_TREE;
680 }
681
726a989a 682 if (!EXPR_P (t))
5f40b3cb
ZD
683 *walk_subtrees = 0;
684
685 return NULL_TREE;
686}
687
cba1eb61 688/* Moves the references to local variables in STMT at *GSI out of the single
9f9f72aa
AP
689 entry single exit region starting at ENTRY. DECL_ADDRESS contains
690 addresses of the references that had their address taken
691 already. */
5f40b3cb
ZD
692
693static void
cba1eb61 694eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
c203e8a7 695 int_tree_htab_type *decl_address)
5f40b3cb
ZD
696{
697 struct elv_data dta;
cba1eb61 698 gimple stmt = gsi_stmt (*gsi);
5f40b3cb 699
726a989a 700 memset (&dta.info, '\0', sizeof (dta.info));
9f9f72aa 701 dta.entry = entry;
5f40b3cb
ZD
702 dta.decl_address = decl_address;
703 dta.changed = false;
cba1eb61 704 dta.reset = false;
5f40b3cb 705
b5b8b0ac 706 if (gimple_debug_bind_p (stmt))
cba1eb61
JJ
707 {
708 dta.gsi = NULL;
709 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
710 eliminate_local_variables_1, &dta.info, NULL);
711 if (dta.reset)
712 {
713 gimple_debug_bind_reset_value (stmt);
714 dta.changed = true;
715 }
716 }
29b89442
JJ
717 else if (gimple_clobber_p (stmt))
718 {
719 stmt = gimple_build_nop ();
720 gsi_replace (gsi, stmt, false);
721 dta.changed = true;
722 }
b5b8b0ac 723 else
cba1eb61
JJ
724 {
725 dta.gsi = gsi;
726 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
727 }
5f40b3cb
ZD
728
729 if (dta.changed)
730 update_stmt (stmt);
731}
732
9f9f72aa
AP
733/* Eliminates the references to local variables from the single entry
734 single exit region between the ENTRY and EXIT edges.
b8698a0f 735
a509ebb5 736 This includes:
b8698a0f
L
737 1) Taking address of a local variable -- these are moved out of the
738 region (and temporary variable is created to hold the address if
a509ebb5 739 necessary).
9f9f72aa 740
5f40b3cb 741 2) Dereferencing a local variable -- these are replaced with indirect
a509ebb5 742 references. */
5f40b3cb
ZD
743
744static void
9f9f72aa 745eliminate_local_variables (edge entry, edge exit)
5f40b3cb 746{
9f9f72aa 747 basic_block bb;
00f96dc9 748 auto_vec<basic_block, 3> body;
5f40b3cb 749 unsigned i;
726a989a 750 gimple_stmt_iterator gsi;
cba1eb61 751 bool has_debug_stmt = false;
c203e8a7 752 int_tree_htab_type decl_address (10);
9f9f72aa
AP
753 basic_block entry_bb = entry->src;
754 basic_block exit_bb = exit->dest;
5f40b3cb 755
9f9f72aa 756 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 757
9771b263 758 FOR_EACH_VEC_ELT (body, i, bb)
9f9f72aa 759 if (bb != entry_bb && bb != exit_bb)
726a989a 760 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
ddb555ed
JJ
761 if (is_gimple_debug (gsi_stmt (gsi)))
762 {
763 if (gimple_debug_bind_p (gsi_stmt (gsi)))
764 has_debug_stmt = true;
765 }
cba1eb61 766 else
c203e8a7 767 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
cba1eb61
JJ
768
769 if (has_debug_stmt)
9771b263 770 FOR_EACH_VEC_ELT (body, i, bb)
cba1eb61
JJ
771 if (bb != entry_bb && bb != exit_bb)
772 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
773 if (gimple_debug_bind_p (gsi_stmt (gsi)))
c203e8a7 774 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
9f9f72aa
AP
775}
776
777/* Returns true if expression EXPR is not defined between ENTRY and
778 EXIT, i.e. if all its operands are defined outside of the region. */
779
780static bool
781expr_invariant_in_region_p (edge entry, edge exit, tree expr)
782{
783 basic_block entry_bb = entry->src;
784 basic_block exit_bb = exit->dest;
785 basic_block def_bb;
9f9f72aa
AP
786
787 if (is_gimple_min_invariant (expr))
788 return true;
789
790 if (TREE_CODE (expr) == SSA_NAME)
791 {
726a989a 792 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
9f9f72aa
AP
793 if (def_bb
794 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
795 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
796 return false;
797
798 return true;
799 }
800
726a989a 801 return false;
5f40b3cb
ZD
802}
803
804/* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
805 The copies are stored to NAME_COPIES, if NAME was already duplicated,
806 its duplicate stored in NAME_COPIES is returned.
b8698a0f 807
5f40b3cb
ZD
808 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
809 duplicated, storing the copies in DECL_COPIES. */
810
811static tree
c203e8a7
TS
812separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
813 int_tree_htab_type *decl_copies,
814 bool copy_name_p)
5f40b3cb
ZD
815{
816 tree copy, var, var_copy;
817 unsigned idx, uid, nuid;
84baa4b9 818 struct int_tree_map ielt;
5f40b3cb 819 struct name_to_copy_elt elt, *nelt;
4a8fb1a1 820 name_to_copy_elt **slot;
84baa4b9 821 int_tree_map *dslot;
5f40b3cb
ZD
822
823 if (TREE_CODE (name) != SSA_NAME)
824 return name;
825
826 idx = SSA_NAME_VERSION (name);
827 elt.version = idx;
c203e8a7
TS
828 slot = name_copies->find_slot_with_hash (&elt, idx,
829 copy_name_p ? INSERT : NO_INSERT);
5f40b3cb 830 if (slot && *slot)
4a8fb1a1 831 return (*slot)->new_name;
5f40b3cb 832
70b5e7dc
RG
833 if (copy_name_p)
834 {
835 copy = duplicate_ssa_name (name, NULL);
836 nelt = XNEW (struct name_to_copy_elt);
837 nelt->version = idx;
838 nelt->new_name = copy;
839 nelt->field = NULL_TREE;
840 *slot = nelt;
841 }
842 else
843 {
844 gcc_assert (!slot);
845 copy = name;
846 }
847
5f40b3cb 848 var = SSA_NAME_VAR (name);
70b5e7dc
RG
849 if (!var)
850 return copy;
851
5f40b3cb
ZD
852 uid = DECL_UID (var);
853 ielt.uid = uid;
84baa4b9
TS
854 dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
855 if (!dslot->to)
5f40b3cb
ZD
856 {
857 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
36ad7922 858 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
84baa4b9
TS
859 dslot->uid = uid;
860 dslot->to = var_copy;
5f40b3cb
ZD
861
862 /* Ensure that when we meet this decl next time, we won't duplicate
a509ebb5 863 it again. */
5f40b3cb
ZD
864 nuid = DECL_UID (var_copy);
865 ielt.uid = nuid;
84baa4b9
TS
866 dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
867 gcc_assert (!dslot->to);
868 dslot->uid = nuid;
869 dslot->to = var_copy;
5f40b3cb
ZD
870 }
871 else
84baa4b9 872 var_copy = dslot->to;
5f40b3cb 873
b2ec94d4 874 replace_ssa_name_symbol (copy, var_copy);
5f40b3cb
ZD
875 return copy;
876}
877
9f9f72aa
AP
878/* Finds the ssa names used in STMT that are defined outside the
879 region between ENTRY and EXIT and replaces such ssa names with
880 their duplicates. The duplicates are stored to NAME_COPIES. Base
881 decls of all ssa names used in STMT (including those defined in
882 LOOP) are replaced with the new temporary variables; the
883 replacement decls are stored in DECL_COPIES. */
5f40b3cb
ZD
884
885static void
726a989a 886separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
c203e8a7
TS
887 name_to_copy_table_type *name_copies,
888 int_tree_htab_type *decl_copies)
5f40b3cb
ZD
889{
890 use_operand_p use;
891 def_operand_p def;
892 ssa_op_iter oi;
893 tree name, copy;
894 bool copy_name_p;
895
5f40b3cb 896 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
a509ebb5
RL
897 {
898 name = DEF_FROM_PTR (def);
899 gcc_assert (TREE_CODE (name) == SSA_NAME);
9f9f72aa
AP
900 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
901 false);
a509ebb5
RL
902 gcc_assert (copy == name);
903 }
5f40b3cb
ZD
904
905 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
a509ebb5
RL
906 {
907 name = USE_FROM_PTR (use);
908 if (TREE_CODE (name) != SSA_NAME)
909 continue;
910
9f9f72aa
AP
911 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
912 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
913 copy_name_p);
a509ebb5
RL
914 SET_USE (use, copy);
915 }
5f40b3cb
ZD
916}
917
b5b8b0ac
AO
918/* Finds the ssa names used in STMT that are defined outside the
919 region between ENTRY and EXIT and replaces such ssa names with
920 their duplicates. The duplicates are stored to NAME_COPIES. Base
921 decls of all ssa names used in STMT (including those defined in
922 LOOP) are replaced with the new temporary variables; the
923 replacement decls are stored in DECL_COPIES. */
924
925static bool
4a8fb1a1 926separate_decls_in_region_debug (gimple stmt,
c203e8a7
TS
927 name_to_copy_table_type *name_copies,
928 int_tree_htab_type *decl_copies)
b5b8b0ac
AO
929{
930 use_operand_p use;
931 ssa_op_iter oi;
932 tree var, name;
933 struct int_tree_map ielt;
934 struct name_to_copy_elt elt;
4a8fb1a1 935 name_to_copy_elt **slot;
84baa4b9 936 int_tree_map *dslot;
b5b8b0ac 937
ddb555ed
JJ
938 if (gimple_debug_bind_p (stmt))
939 var = gimple_debug_bind_get_var (stmt);
940 else if (gimple_debug_source_bind_p (stmt))
941 var = gimple_debug_source_bind_get_var (stmt);
942 else
943 return true;
598e67d7 944 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
4f2a9af8 945 return true;
b5b8b0ac
AO
946 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
947 ielt.uid = DECL_UID (var);
84baa4b9 948 dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
b5b8b0ac
AO
949 if (!dslot)
950 return true;
ddb555ed 951 if (gimple_debug_bind_p (stmt))
84baa4b9 952 gimple_debug_bind_set_var (stmt, dslot->to);
ddb555ed 953 else if (gimple_debug_source_bind_p (stmt))
84baa4b9 954 gimple_debug_source_bind_set_var (stmt, dslot->to);
b5b8b0ac
AO
955
956 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
957 {
958 name = USE_FROM_PTR (use);
959 if (TREE_CODE (name) != SSA_NAME)
960 continue;
961
962 elt.version = SSA_NAME_VERSION (name);
c203e8a7 963 slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
b5b8b0ac
AO
964 if (!slot)
965 {
966 gimple_debug_bind_reset_value (stmt);
967 update_stmt (stmt);
968 break;
969 }
970
4a8fb1a1 971 SET_USE (use, (*slot)->new_name);
b5b8b0ac
AO
972 }
973
974 return false;
975}
976
0eb7e7aa
RL
977/* Callback for htab_traverse. Adds a field corresponding to the reduction
978 specified in SLOT. The type is passed in DATA. */
979
4a8fb1a1
LC
980int
981add_field_for_reduction (reduction_info **slot, tree type)
a509ebb5 982{
b8698a0f 983
4a8fb1a1 984 struct reduction_info *const red = *slot;
aa06a978
RB
985 tree var = gimple_assign_lhs (red->reduc_stmt);
986 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
987 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
0eb7e7aa
RL
988
989 insert_field_into_struct (type, field);
990
991 red->field = field;
992
993 return 1;
994}
a509ebb5 995
5f40b3cb 996/* Callback for htab_traverse. Adds a field corresponding to a ssa name
b8698a0f 997 described in SLOT. The type is passed in DATA. */
5f40b3cb 998
4a8fb1a1
LC
999int
1000add_field_for_name (name_to_copy_elt **slot, tree type)
5f40b3cb 1001{
4a8fb1a1 1002 struct name_to_copy_elt *const elt = *slot;
5f40b3cb 1003 tree name = ssa_name (elt->version);
70b5e7dc
RG
1004 tree field = build_decl (UNKNOWN_LOCATION,
1005 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1006 TREE_TYPE (name));
5f40b3cb
ZD
1007
1008 insert_field_into_struct (type, field);
1009 elt->field = field;
a509ebb5 1010
5f40b3cb
ZD
1011 return 1;
1012}
1013
b8698a0f
L
1014/* Callback for htab_traverse. A local result is the intermediate result
1015 computed by a single
fa10beec 1016 thread, or the initial value in case no iteration was executed.
b8698a0f
L
1017 This function creates a phi node reflecting these values.
1018 The phi's result will be stored in NEW_PHI field of the
1019 reduction's data structure. */
a509ebb5 1020
4a8fb1a1
LC
1021int
1022create_phi_for_local_result (reduction_info **slot, struct loop *loop)
a509ebb5 1023{
4a8fb1a1 1024 struct reduction_info *const reduc = *slot;
a509ebb5 1025 edge e;
538dd0b7 1026 gphi *new_phi;
a509ebb5
RL
1027 basic_block store_bb;
1028 tree local_res;
f5045c96 1029 source_location locus;
a509ebb5 1030
b8698a0f
L
1031 /* STORE_BB is the block where the phi
1032 should be stored. It is the destination of the loop exit.
726a989a 1033 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
a509ebb5
RL
1034 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1035
1036 /* STORE_BB has two predecessors. One coming from the loop
1037 (the reduction's result is computed at the loop),
b8698a0f
L
1038 and another coming from a block preceding the loop,
1039 when no iterations
1040 are executed (the initial value should be taken). */
a509ebb5
RL
1041 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1042 e = EDGE_PRED (store_bb, 1);
1043 else
1044 e = EDGE_PRED (store_bb, 0);
b731b390 1045 local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt));
f5045c96 1046 locus = gimple_location (reduc->reduc_stmt);
a509ebb5 1047 new_phi = create_phi_node (local_res, store_bb);
9e227d60 1048 add_phi_arg (new_phi, reduc->init, e, locus);
726a989a 1049 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
9e227d60 1050 FALLTHRU_EDGE (loop->latch), locus);
a509ebb5
RL
1051 reduc->new_phi = new_phi;
1052
1053 return 1;
1054}
5f40b3cb
ZD
1055
1056struct clsn_data
1057{
1058 tree store;
1059 tree load;
1060
1061 basic_block store_bb;
1062 basic_block load_bb;
1063};
1064
a509ebb5 1065/* Callback for htab_traverse. Create an atomic instruction for the
b8698a0f 1066 reduction described in SLOT.
a509ebb5
RL
1067 DATA annotates the place in memory the atomic operation relates to,
1068 and the basic block it needs to be generated in. */
1069
4a8fb1a1
LC
1070int
1071create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
a509ebb5 1072{
4a8fb1a1 1073 struct reduction_info *const reduc = *slot;
726a989a 1074 gimple_stmt_iterator gsi;
a509ebb5 1075 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
a509ebb5
RL
1076 tree load_struct;
1077 basic_block bb;
1078 basic_block new_bb;
1079 edge e;
0f900dfa 1080 tree t, addr, ref, x;
726a989a
RB
1081 tree tmp_load, name;
1082 gimple load;
a509ebb5 1083
70f34814 1084 load_struct = build_simple_mem_ref (clsn_data->load);
a509ebb5 1085 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
a509ebb5
RL
1086
1087 addr = build_addr (t, current_function_decl);
1088
1089 /* Create phi node. */
1090 bb = clsn_data->load_bb;
1091
b13c907a
RB
1092 gsi = gsi_last_bb (bb);
1093 e = split_block (bb, gsi_stmt (gsi));
a509ebb5
RL
1094 new_bb = e->dest;
1095
b731b390
JJ
1096 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1097 tmp_load = make_ssa_name (tmp_load);
726a989a 1098 load = gimple_build_omp_atomic_load (tmp_load, addr);
a509ebb5 1099 SSA_NAME_DEF_STMT (tmp_load) = load;
726a989a
RB
1100 gsi = gsi_start_bb (new_bb);
1101 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
a509ebb5
RL
1102
1103 e = split_block (new_bb, load);
1104 new_bb = e->dest;
726a989a 1105 gsi = gsi_start_bb (new_bb);
a509ebb5 1106 ref = tmp_load;
726a989a
RB
1107 x = fold_build2 (reduc->reduction_code,
1108 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1109 PHI_RESULT (reduc->new_phi));
a509ebb5 1110
726a989a
RB
1111 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1112 GSI_CONTINUE_LINKING);
a509ebb5 1113
726a989a 1114 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
a509ebb5
RL
1115 return 1;
1116}
1117
b8698a0f
L
1118/* Create the atomic operation at the join point of the threads.
1119 REDUCTION_LIST describes the reductions in the LOOP.
1120 LD_ST_DATA describes the shared data structure where
a509ebb5
RL
1121 shared data is stored in and loaded from. */
1122static void
4a8fb1a1 1123create_call_for_reduction (struct loop *loop,
c203e8a7 1124 reduction_info_table_type *reduction_list,
a509ebb5
RL
1125 struct clsn_data *ld_st_data)
1126{
c203e8a7 1127 reduction_list->traverse <struct loop *, create_phi_for_local_result> (loop);
726a989a 1128 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
a509ebb5 1129 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
4a8fb1a1 1130 reduction_list
c203e8a7 1131 ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
a509ebb5
RL
1132}
1133
ae0bce62
RL
1134/* Callback for htab_traverse. Loads the final reduction value at the
1135 join point of all threads, and inserts it in the right place. */
a509ebb5 1136
4a8fb1a1
LC
1137int
1138create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
a509ebb5 1139{
4a8fb1a1 1140 struct reduction_info *const red = *slot;
726a989a
RB
1141 gimple stmt;
1142 gimple_stmt_iterator gsi;
1143 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
a509ebb5 1144 tree load_struct;
ae0bce62 1145 tree name;
a509ebb5
RL
1146 tree x;
1147
79855460
TV
1148 /* If there's no exit phi, the result of the reduction is unused. */
1149 if (red->keep_res == NULL)
1150 return 1;
1151
726a989a 1152 gsi = gsi_after_labels (clsn_data->load_bb);
70f34814 1153 load_struct = build_simple_mem_ref (clsn_data->load);
a509ebb5
RL
1154 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1155 NULL_TREE);
a509ebb5 1156
ae0bce62 1157 x = load_struct;
a509ebb5 1158 name = PHI_RESULT (red->keep_res);
726a989a 1159 stmt = gimple_build_assign (name, x);
a509ebb5 1160
726a989a 1161 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
a509ebb5 1162
726a989a
RB
1163 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1164 !gsi_end_p (gsi); gsi_next (&gsi))
1165 if (gsi_stmt (gsi) == red->keep_res)
1166 {
1167 remove_phi_node (&gsi, false);
1168 return 1;
1169 }
1170 gcc_unreachable ();
a509ebb5
RL
1171}
1172
b8698a0f 1173/* Load the reduction result that was stored in LD_ST_DATA.
a509ebb5 1174 REDUCTION_LIST describes the list of reductions that the
fa10beec 1175 loads should be generated for. */
a509ebb5 1176static void
c203e8a7 1177create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
a509ebb5
RL
1178 struct clsn_data *ld_st_data)
1179{
726a989a 1180 gimple_stmt_iterator gsi;
a509ebb5 1181 tree t;
726a989a 1182 gimple stmt;
a509ebb5 1183
726a989a 1184 gsi = gsi_after_labels (ld_st_data->load_bb);
a509ebb5 1185 t = build_fold_addr_expr (ld_st_data->store);
726a989a 1186 stmt = gimple_build_assign (ld_st_data->load, t);
a509ebb5 1187
726a989a 1188 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
a509ebb5 1189
4a8fb1a1 1190 reduction_list
c203e8a7 1191 ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
a509ebb5
RL
1192
1193}
1194
0eb7e7aa
RL
1195/* Callback for htab_traverse. Store the neutral value for the
1196 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1197 1 for MULT_EXPR, etc. into the reduction field.
b8698a0f
L
1198 The reduction is specified in SLOT. The store information is
1199 passed in DATA. */
0eb7e7aa 1200
4a8fb1a1
LC
1201int
1202create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
0eb7e7aa 1203{
4a8fb1a1 1204 struct reduction_info *const red = *slot;
726a989a
RB
1205 tree t;
1206 gimple stmt;
1207 gimple_stmt_iterator gsi;
1208 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1209
1210 gsi = gsi_last_bb (clsn_data->store_bb);
1211 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1212 stmt = gimple_build_assign (t, red->initial_value);
726a989a 1213 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
0eb7e7aa
RL
1214
1215 return 1;
1216}
1217
a509ebb5
RL
1218/* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1219 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1220 specified in SLOT. */
1221
4a8fb1a1
LC
1222int
1223create_loads_and_stores_for_name (name_to_copy_elt **slot,
1224 struct clsn_data *clsn_data)
5f40b3cb 1225{
4a8fb1a1 1226 struct name_to_copy_elt *const elt = *slot;
726a989a
RB
1227 tree t;
1228 gimple stmt;
1229 gimple_stmt_iterator gsi;
5f40b3cb 1230 tree type = TREE_TYPE (elt->new_name);
5f40b3cb
ZD
1231 tree load_struct;
1232
726a989a
RB
1233 gsi = gsi_last_bb (clsn_data->store_bb);
1234 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1235 stmt = gimple_build_assign (t, ssa_name (elt->version));
726a989a 1236 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb 1237
726a989a 1238 gsi = gsi_last_bb (clsn_data->load_bb);
70f34814 1239 load_struct = build_simple_mem_ref (clsn_data->load);
726a989a
RB
1240 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1241 stmt = gimple_build_assign (elt->new_name, t);
726a989a 1242 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1243
1244 return 1;
1245}
1246
1247/* Moves all the variables used in LOOP and defined outside of it (including
1248 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1249 name) to a structure created for this purpose. The code
b8698a0f 1250
5f40b3cb
ZD
1251 while (1)
1252 {
1253 use (a);
1254 use (b);
1255 }
1256
1257 is transformed this way:
1258
1259 bb0:
1260 old.a = a;
1261 old.b = b;
1262
1263 bb1:
1264 a' = new->a;
1265 b' = new->b;
1266 while (1)
1267 {
1268 use (a');
1269 use (b');
1270 }
1271
1272 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1273 pointer `new' is intentionally not initialized (the loop will be split to a
1274 separate function later, and `new' will be initialized from its arguments).
a509ebb5 1275 LD_ST_DATA holds information about the shared data structure used to pass
b8698a0f
L
1276 information among the threads. It is initialized here, and
1277 gen_parallel_loop will pass it to create_call_for_reduction that
1278 needs this information. REDUCTION_LIST describes the reductions
a509ebb5 1279 in LOOP. */
5f40b3cb
ZD
1280
1281static void
4a8fb1a1 1282separate_decls_in_region (edge entry, edge exit,
c203e8a7 1283 reduction_info_table_type *reduction_list,
b8698a0f 1284 tree *arg_struct, tree *new_arg_struct,
9f9f72aa 1285 struct clsn_data *ld_st_data)
a509ebb5 1286
5f40b3cb 1287{
9f9f72aa 1288 basic_block bb1 = split_edge (entry);
5f40b3cb 1289 basic_block bb0 = single_pred (bb1);
c203e8a7
TS
1290 name_to_copy_table_type name_copies (10);
1291 int_tree_htab_type decl_copies (10);
5f40b3cb 1292 unsigned i;
726a989a
RB
1293 tree type, type_name, nvar;
1294 gimple_stmt_iterator gsi;
5f40b3cb 1295 struct clsn_data clsn_data;
00f96dc9 1296 auto_vec<basic_block, 3> body;
9f9f72aa
AP
1297 basic_block bb;
1298 basic_block entry_bb = bb1;
1299 basic_block exit_bb = exit->dest;
b5b8b0ac 1300 bool has_debug_stmt = false;
5f40b3cb 1301
726a989a 1302 entry = single_succ_edge (entry_bb);
9f9f72aa 1303 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 1304
9771b263 1305 FOR_EACH_VEC_ELT (body, i, bb)
9f9f72aa 1306 {
b8698a0f 1307 if (bb != entry_bb && bb != exit_bb)
9f9f72aa 1308 {
726a989a
RB
1309 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1310 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
c203e8a7 1311 &name_copies, &decl_copies);
726a989a
RB
1312
1313 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
b5b8b0ac
AO
1314 {
1315 gimple stmt = gsi_stmt (gsi);
1316
1317 if (is_gimple_debug (stmt))
1318 has_debug_stmt = true;
1319 else
1320 separate_decls_in_region_stmt (entry, exit, stmt,
c203e8a7 1321 &name_copies, &decl_copies);
b5b8b0ac 1322 }
9f9f72aa 1323 }
5f40b3cb 1324 }
9f9f72aa 1325
b5b8b0ac
AO
1326 /* Now process debug bind stmts. We must not create decls while
1327 processing debug stmts, so we defer their processing so as to
1328 make sure we will have debug info for as many variables as
1329 possible (all of those that were dealt with in the loop above),
1330 and discard those for which we know there's nothing we can
1331 do. */
1332 if (has_debug_stmt)
9771b263 1333 FOR_EACH_VEC_ELT (body, i, bb)
b5b8b0ac
AO
1334 if (bb != entry_bb && bb != exit_bb)
1335 {
1336 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1337 {
1338 gimple stmt = gsi_stmt (gsi);
1339
ddb555ed 1340 if (is_gimple_debug (stmt))
b5b8b0ac 1341 {
c203e8a7
TS
1342 if (separate_decls_in_region_debug (stmt, &name_copies,
1343 &decl_copies))
b5b8b0ac
AO
1344 {
1345 gsi_remove (&gsi, true);
1346 continue;
1347 }
1348 }
1349
1350 gsi_next (&gsi);
1351 }
1352 }
1353
c203e8a7 1354 if (name_copies.elements () == 0 && reduction_list->elements () == 0)
5f40b3cb
ZD
1355 {
1356 /* It may happen that there is nothing to copy (if there are only
a509ebb5 1357 loop carried and external variables in the loop). */
5f40b3cb
ZD
1358 *arg_struct = NULL;
1359 *new_arg_struct = NULL;
1360 }
1361 else
1362 {
1363 /* Create the type for the structure to store the ssa names to. */
1364 type = lang_hooks.types.make_type (RECORD_TYPE);
9ff70652 1365 type_name = build_decl (UNKNOWN_LOCATION,
c2255bc4 1366 TYPE_DECL, create_tmp_var_name (".paral_data"),
5f40b3cb
ZD
1367 type);
1368 TYPE_NAME (type) = type_name;
1369
4a8fb1a1 1370 name_copies.traverse <tree, add_field_for_name> (type);
c203e8a7 1371 if (reduction_list && reduction_list->elements () > 0)
0eb7e7aa
RL
1372 {
1373 /* Create the fields for reductions. */
c203e8a7 1374 reduction_list->traverse <tree, add_field_for_reduction> (type);
0eb7e7aa 1375 }
5f40b3cb 1376 layout_type (type);
b8698a0f 1377
5f40b3cb
ZD
1378 /* Create the loads and stores. */
1379 *arg_struct = create_tmp_var (type, ".paral_data_store");
5f40b3cb 1380 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
b731b390 1381 *new_arg_struct = make_ssa_name (nvar);
5f40b3cb 1382
a509ebb5
RL
1383 ld_st_data->store = *arg_struct;
1384 ld_st_data->load = *new_arg_struct;
1385 ld_st_data->store_bb = bb0;
1386 ld_st_data->load_bb = bb1;
0eb7e7aa 1387
4a8fb1a1
LC
1388 name_copies
1389 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1390 (ld_st_data);
a509ebb5 1391
ae0bce62
RL
1392 /* Load the calculation from memory (after the join of the threads). */
1393
c203e8a7 1394 if (reduction_list && reduction_list->elements () > 0)
a509ebb5 1395 {
4a8fb1a1 1396 reduction_list
c203e8a7
TS
1397 ->traverse <struct clsn_data *, create_stores_for_reduction>
1398 (ld_st_data);
b731b390 1399 clsn_data.load = make_ssa_name (nvar);
9f9f72aa 1400 clsn_data.load_bb = exit->dest;
a509ebb5
RL
1401 clsn_data.store = ld_st_data->store;
1402 create_final_loads_for_reduction (reduction_list, &clsn_data);
1403 }
5f40b3cb 1404 }
5f40b3cb
ZD
1405}
1406
a79b7ec5 1407/* Returns true if FN was created to run in parallel. */
5f40b3cb 1408
62e0a1ed 1409bool
a79b7ec5 1410parallelized_function_p (tree fndecl)
5f40b3cb 1411{
a79b7ec5
TV
1412 cgraph_node *node = cgraph_node::get (fndecl);
1413 gcc_assert (node != NULL);
1414 return node->parallelized_function;
5f40b3cb
ZD
1415}
1416
1417/* Creates and returns an empty function that will receive the body of
1418 a parallelized loop. */
1419
1420static tree
9ff70652 1421create_loop_fn (location_t loc)
5f40b3cb
ZD
1422{
1423 char buf[100];
1424 char *tname;
1425 tree decl, type, name, t;
1426 struct function *act_cfun = cfun;
1427 static unsigned loopfn_num;
1428
5368224f 1429 loc = LOCATION_LOCUS (loc);
5f40b3cb
ZD
1430 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1431 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1432 clean_symbol_name (tname);
1433 name = get_identifier (tname);
1434 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1435
9ff70652 1436 decl = build_decl (loc, FUNCTION_DECL, name, type);
5f40b3cb
ZD
1437 TREE_STATIC (decl) = 1;
1438 TREE_USED (decl) = 1;
1439 DECL_ARTIFICIAL (decl) = 1;
1440 DECL_IGNORED_P (decl) = 0;
1441 TREE_PUBLIC (decl) = 0;
1442 DECL_UNINLINABLE (decl) = 1;
1443 DECL_EXTERNAL (decl) = 0;
1444 DECL_CONTEXT (decl) = NULL_TREE;
1445 DECL_INITIAL (decl) = make_node (BLOCK);
1446
9ff70652 1447 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
5f40b3cb
ZD
1448 DECL_ARTIFICIAL (t) = 1;
1449 DECL_IGNORED_P (t) = 1;
1450 DECL_RESULT (decl) = t;
1451
9ff70652 1452 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
5f40b3cb
ZD
1453 ptr_type_node);
1454 DECL_ARTIFICIAL (t) = 1;
1455 DECL_ARG_TYPE (t) = ptr_type_node;
1456 DECL_CONTEXT (t) = decl;
1457 TREE_USED (t) = 1;
1458 DECL_ARGUMENTS (decl) = t;
1459
182e0d71 1460 allocate_struct_function (decl, false);
5f40b3cb
ZD
1461
1462 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1463 it. */
5576d6f2 1464 set_cfun (act_cfun);
5f40b3cb
ZD
1465
1466 return decl;
1467}
1468
7c82d827
TV
1469/* Replace uses of NAME by VAL in block BB. */
1470
1471static void
1472replace_uses_in_bb_by (tree name, tree val, basic_block bb)
1473{
1474 gimple use_stmt;
1475 imm_use_iterator imm_iter;
1476
1477 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
1478 {
1479 if (gimple_bb (use_stmt) != bb)
1480 continue;
1481
1482 use_operand_p use_p;
1483 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1484 SET_USE (use_p, val);
1485 }
1486}
1487
7c82d827
TV
1488/* Do transformation from:
1489
1490 <bb preheader>:
1491 ...
1492 goto <bb header>
1493
1494 <bb header>:
1495 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1496 sum_a = PHI <sum_init (preheader), sum_b (latch)>
1497 ...
1498 use (ivtmp_a)
1499 ...
1500 sum_b = sum_a + sum_update
1501 ...
1502 if (ivtmp_a < n)
1503 goto <bb latch>;
1504 else
1505 goto <bb exit>;
1506
1507 <bb latch>:
1508 ivtmp_b = ivtmp_a + 1;
1509 goto <bb header>
1510
1511 <bb exit>:
712cb0bb 1512 sum_z = PHI <sum_b (cond[1]), ...>
7c82d827
TV
1513
1514 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
1515 that's <bb header>.
1516
1517 to:
1518
1519 <bb preheader>:
1520 ...
1521 goto <bb newheader>
1522
1523 <bb header>:
1524 ivtmp_a = PHI <ivtmp_c (latch)>
1525 sum_a = PHI <sum_c (latch)>
1526 ...
1527 use (ivtmp_a)
1528 ...
1529 sum_b = sum_a + sum_update
1530 ...
1531 goto <bb latch>;
1532
1533 <bb newheader>:
1534 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1535 sum_c = PHI <sum_init (preheader), sum_b (latch)>
1536 if (ivtmp_c < n + 1)
1537 goto <bb header>;
1538 else
712cb0bb 1539 goto <bb newexit>;
7c82d827
TV
1540
1541 <bb latch>:
1542 ivtmp_b = ivtmp_a + 1;
1543 goto <bb newheader>
1544
712cb0bb
TV
1545 <bb newexit>:
1546 sum_y = PHI <sum_c (newheader)>
1547
7c82d827 1548 <bb exit>:
712cb0bb 1549 sum_z = PHI <sum_y (newexit), ...>
7c82d827
TV
1550
1551
1552 In unified diff format:
1553
1554 <bb preheader>:
1555 ...
1556- goto <bb header>
1557+ goto <bb newheader>
1558
1559 <bb header>:
1560- ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1561- sum_a = PHI <sum_init (preheader), sum_b (latch)>
1562+ ivtmp_a = PHI <ivtmp_c (latch)>
1563+ sum_a = PHI <sum_c (latch)>
1564 ...
1565 use (ivtmp_a)
1566 ...
1567 sum_b = sum_a + sum_update
1568 ...
1569- if (ivtmp_a < n)
1570- goto <bb latch>;
1571+ goto <bb latch>;
1572+
1573+ <bb newheader>:
1574+ ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1575+ sum_c = PHI <sum_init (preheader), sum_b (latch)>
1576+ if (ivtmp_c < n + 1)
1577+ goto <bb header>;
1578 else
1579 goto <bb exit>;
1580
1581 <bb latch>:
1582 ivtmp_b = ivtmp_a + 1;
1583- goto <bb header>
1584+ goto <bb newheader>
1585
712cb0bb
TV
1586+ <bb newexit>:
1587+ sum_y = PHI <sum_c (newheader)>
1588
7c82d827 1589 <bb exit>:
712cb0bb
TV
1590- sum_z = PHI <sum_b (cond[1]), ...>
1591+ sum_z = PHI <sum_y (newexit), ...>
7c82d827
TV
1592
1593 Note: the example does not show any virtual phis, but these are handled more
1594 or less as reductions.
b8698a0f 1595
7c82d827
TV
1596
1597 Moves the exit condition of LOOP to the beginning of its header.
1598 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
1599 bound. */
1600
1601static void
1602transform_to_exit_first_loop_alt (struct loop *loop,
1603 reduction_info_table_type *reduction_list,
1604 tree bound)
1605{
1606 basic_block header = loop->header;
1607 basic_block latch = loop->latch;
1608 edge exit = single_dom_exit (loop);
1609 basic_block exit_block = exit->dest;
1610 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1611 tree control = gimple_cond_lhs (cond_stmt);
1612 edge e;
1613
338392ed
TV
1614 /* Rewriting virtuals into loop-closed ssa normal form makes this
1615 transformation simpler. It also ensures that the virtuals are in
1616 loop-closed ssa normal from after the transformation, which is required by
1617 create_parallel_loop. */
1618 rewrite_virtuals_into_loop_closed_ssa (loop);
7c82d827
TV
1619
1620 /* Create the new_header block. */
1621 basic_block new_header = split_block_before_cond_jump (exit->src);
712cb0bb 1622 edge edge_at_split = single_pred_edge (new_header);
7c82d827
TV
1623
1624 /* Redirect entry edge to new_header. */
1625 edge entry = loop_preheader_edge (loop);
1626 e = redirect_edge_and_branch (entry, new_header);
1627 gcc_assert (e == entry);
1628
1629 /* Redirect post_inc_edge to new_header. */
1630 edge post_inc_edge = single_succ_edge (latch);
1631 e = redirect_edge_and_branch (post_inc_edge, new_header);
1632 gcc_assert (e == post_inc_edge);
1633
1634 /* Redirect post_cond_edge to header. */
1635 edge post_cond_edge = single_pred_edge (latch);
1636 e = redirect_edge_and_branch (post_cond_edge, header);
1637 gcc_assert (e == post_cond_edge);
1638
712cb0bb
TV
1639 /* Redirect edge_at_split to latch. */
1640 e = redirect_edge_and_branch (edge_at_split, latch);
1641 gcc_assert (e == edge_at_split);
7c82d827
TV
1642
1643 /* Set the new loop bound. */
1644 gimple_cond_set_rhs (cond_stmt, bound);
5a5fd951 1645 update_stmt (cond_stmt);
7c82d827
TV
1646
1647 /* Repair the ssa. */
1648 vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
1649 edge_var_map *vm;
1650 gphi_iterator gsi;
338392ed 1651 int i;
7c82d827
TV
1652 for (gsi = gsi_start_phis (header), i = 0;
1653 !gsi_end_p (gsi) && v->iterate (i, &vm);
1654 gsi_next (&gsi), i++)
1655 {
1656 gphi *phi = gsi.phi ();
1657 tree res_a = PHI_RESULT (phi);
1658
1659 /* Create new phi. */
1660 tree res_c = copy_ssa_name (res_a, phi);
1661 gphi *nphi = create_phi_node (res_c, new_header);
1662
1663 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
1664 replace_uses_in_bb_by (res_a, res_c, new_header);
1665
1666 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
1667 add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
1668
338392ed 1669 /* Replace sum_b with sum_c in exit phi. */
7c82d827 1670 tree res_b = redirect_edge_var_map_def (vm);
338392ed 1671 replace_uses_in_bb_by (res_b, res_c, exit_block);
7c82d827
TV
1672
1673 struct reduction_info *red = reduction_phi (reduction_list, phi);
1674 gcc_assert (virtual_operand_p (res_a)
1675 || res_a == control
1676 || red != NULL);
1677
1678 if (red)
1679 {
1680 /* Register the new reduction phi. */
1681 red->reduc_phi = nphi;
1682 gimple_set_uid (red->reduc_phi, red->reduc_version);
1683 }
1684 }
1685 gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
7c82d827
TV
1686
1687 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
1688 flush_pending_stmts (entry);
1689
1690 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
1691 flush_pending_stmts (post_inc_edge);
1692
712cb0bb
TV
1693 /* Create a new empty exit block, inbetween the new loop header and the old
1694 exit block. The function separate_decls_in_region needs this block to
1695 insert code that is active on loop exit, but not any other path. */
1696 basic_block new_exit_block = split_edge (exit);
1697
1698 /* Insert and register the reduction exit phis. */
7c82d827
TV
1699 for (gphi_iterator gsi = gsi_start_phis (exit_block);
1700 !gsi_end_p (gsi);
1701 gsi_next (&gsi))
1702 {
1703 gphi *phi = gsi.phi ();
1704 tree res_z = PHI_RESULT (phi);
712cb0bb
TV
1705
1706 /* Now that we have a new exit block, duplicate the phi of the old exit
1707 block in the new exit block to preserve loop-closed ssa. */
1708 edge succ_new_exit_block = single_succ_edge (new_exit_block);
1709 edge pred_new_exit_block = single_pred_edge (new_exit_block);
1710 tree res_y = copy_ssa_name (res_z, phi);
1711 gphi *nphi = create_phi_node (res_y, new_exit_block);
1712 tree res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
1713 add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
1714 add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
1715
7c82d827
TV
1716 if (virtual_operand_p (res_z))
1717 continue;
1718
7c82d827
TV
1719 gimple reduc_phi = SSA_NAME_DEF_STMT (res_c);
1720 struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
1721 if (red != NULL)
712cb0bb 1722 red->keep_res = nphi;
7c82d827
TV
1723 }
1724
1725 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
1726 then we're still using some fields, so only bother about fields that are
1727 still used: header and latch.
1728 The loop has a new header bb, so we update it. The latch bb stays the
1729 same. */
1730 loop->header = new_header;
1731
1732 /* Recalculate dominance info. */
1733 free_dominance_info (CDI_DOMINATORS);
1734 calculate_dominance_info (CDI_DOMINATORS);
1735}
1736
1737/* Tries to moves the exit condition of LOOP to the beginning of its header
1738 without duplication of the loop body. NIT is the number of iterations of the
1739 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
1740 transformation is successful. */
1741
1742static bool
1743try_transform_to_exit_first_loop_alt (struct loop *loop,
1744 reduction_info_table_type *reduction_list,
1745 tree nit)
1746{
1747 /* Check whether the latch contains a single statement. */
1b7f61eb
TV
1748 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
1749 return false;
7c82d827
TV
1750
1751 /* Check whether the latch contains the loop iv increment. */
1752 edge back = single_succ_edge (loop->latch);
1753 edge exit = single_dom_exit (loop);
1754 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1755 tree control = gimple_cond_lhs (cond_stmt);
1756 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
1757 tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
1758 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
1759 return false;
1760
1761 /* Check whether there's no code between the loop condition and the latch. */
1762 if (!single_pred_p (loop->latch)
1763 || single_pred (loop->latch) != exit->src)
1764 return false;
1765
1766 tree alt_bound = NULL_TREE;
1767 tree nit_type = TREE_TYPE (nit);
1768
1769 /* Figure out whether nit + 1 overflows. */
1770 if (TREE_CODE (nit) == INTEGER_CST)
1771 {
1772 if (!tree_int_cst_equal (nit, TYPE_MAXVAL (nit_type)))
1773 {
1774 alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
1775 nit, build_one_cst (nit_type));
1776
1777 gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
fd7b3ef5
TV
1778 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1779 return true;
7c82d827
TV
1780 }
1781 else
1782 {
1783 /* Todo: Figure out if we can trigger this, if it's worth to handle
1784 optimally, and if we can handle it optimally. */
fd7b3ef5 1785 return false;
7c82d827
TV
1786 }
1787 }
7c82d827 1788
fd7b3ef5 1789 gcc_assert (TREE_CODE (nit) == SSA_NAME);
7c82d827 1790
4f75d608
TV
1791 /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
1792 iv with base 0 and step 1 that is incremented in the latch, like this:
1793
1794 <bb header>:
1795 # iv_1 = PHI <0 (preheader), iv_2 (latch)>
1796 ...
1797 if (iv_1 < nit)
1798 goto <bb latch>;
1799 else
1800 goto <bb exit>;
1801
1802 <bb latch>:
1803 iv_2 = iv_1 + 1;
1804 goto <bb header>;
1805
1806 The range of iv_1 is [0, nit]. The latch edge is taken for
1807 iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit. So the
1808 number of latch executions is equal to nit.
1809
1810 The function max_loop_iterations gives us the maximum number of latch
1811 executions, so it gives us the maximum value of nit. */
1812 widest_int nit_max;
1813 if (!max_loop_iterations (loop, &nit_max))
1814 return false;
1815
1816 /* Check if nit + 1 overflows. */
1817 widest_int type_max = wi::to_widest (TYPE_MAXVAL (nit_type));
1818 if (!wi::lts_p (nit_max, type_max))
1819 return false;
1820
fd7b3ef5 1821 gimple def = SSA_NAME_DEF_STMT (nit);
7c82d827 1822
4f75d608 1823 /* Try to find nit + 1, in the form of n in an assignment nit = n - 1. */
fd7b3ef5
TV
1824 if (def
1825 && is_gimple_assign (def)
1826 && gimple_assign_rhs_code (def) == PLUS_EXPR)
1827 {
1828 tree op1 = gimple_assign_rhs1 (def);
1829 tree op2 = gimple_assign_rhs2 (def);
1830 if (integer_minus_onep (op1))
1831 alt_bound = op2;
1832 else if (integer_minus_onep (op2))
1833 alt_bound = op1;
7c82d827
TV
1834 }
1835
9f620bf1 1836 /* If not found, insert nit + 1. */
7c82d827 1837 if (alt_bound == NULL_TREE)
9f620bf1
TV
1838 {
1839 alt_bound = fold_build2 (PLUS_EXPR, nit_type, nit,
1840 build_int_cst_type (nit_type, 1));
1841
1842 gimple_stmt_iterator gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
1843
1844 alt_bound
1845 = force_gimple_operand_gsi (&gsi, alt_bound, true, NULL_TREE, false,
1846 GSI_CONTINUE_LINKING);
1847 }
7c82d827
TV
1848
1849 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1850 return true;
1851}
1852
1853/* Moves the exit condition of LOOP to the beginning of its header. NIT is the
1854 number of iterations of the loop. REDUCTION_LIST describes the reductions in
1855 LOOP. */
5f40b3cb
ZD
1856
1857static void
4a8fb1a1 1858transform_to_exit_first_loop (struct loop *loop,
c203e8a7 1859 reduction_info_table_type *reduction_list,
4a8fb1a1 1860 tree nit)
5f40b3cb
ZD
1861{
1862 basic_block *bbs, *nbbs, ex_bb, orig_header;
1863 unsigned n;
1864 bool ok;
1865 edge exit = single_dom_exit (loop), hpred;
726a989a 1866 tree control, control_name, res, t;
538dd0b7
DM
1867 gphi *phi, *nphi;
1868 gassign *stmt;
1869 gcond *cond_stmt, *cond_nit;
48710229 1870 tree nit_1;
5f40b3cb
ZD
1871
1872 split_block_after_labels (loop->header);
1873 orig_header = single_succ (loop->header);
1874 hpred = single_succ_edge (loop->header);
1875
538dd0b7 1876 cond_stmt = as_a <gcond *> (last_stmt (exit->src));
726a989a
RB
1877 control = gimple_cond_lhs (cond_stmt);
1878 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
5f40b3cb
ZD
1879
1880 /* Make sure that we have phi nodes on exit for all loop header phis
1881 (create_parallel_loop requires that). */
538dd0b7
DM
1882 for (gphi_iterator gsi = gsi_start_phis (loop->header);
1883 !gsi_end_p (gsi);
1884 gsi_next (&gsi))
5f40b3cb 1885 {
538dd0b7 1886 phi = gsi.phi ();
5f40b3cb 1887 res = PHI_RESULT (phi);
070ecdfd 1888 t = copy_ssa_name (res, phi);
5f40b3cb 1889 SET_PHI_RESULT (phi, t);
5f40b3cb 1890 nphi = create_phi_node (res, orig_header);
9e227d60 1891 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
5f40b3cb
ZD
1892
1893 if (res == control)
1894 {
726a989a 1895 gimple_cond_set_lhs (cond_stmt, t);
5f40b3cb
ZD
1896 update_stmt (cond_stmt);
1897 control = t;
1898 }
1899 }
12037899 1900
5f40b3cb 1901 bbs = get_loop_body_in_dom_order (loop);
48710229 1902
69958396
RL
1903 for (n = 0; bbs[n] != exit->src; n++)
1904 continue;
5f40b3cb 1905 nbbs = XNEWVEC (basic_block, n);
726a989a
RB
1906 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1907 bbs + 1, n, nbbs);
5f40b3cb
ZD
1908 gcc_assert (ok);
1909 free (bbs);
1910 ex_bb = nbbs[0];
1911 free (nbbs);
1912
b8698a0f 1913 /* Other than reductions, the only gimple reg that should be copied
726a989a 1914 out of the loop is the control variable. */
69958396 1915 exit = single_dom_exit (loop);
5f40b3cb 1916 control_name = NULL_TREE;
538dd0b7
DM
1917 for (gphi_iterator gsi = gsi_start_phis (ex_bb);
1918 !gsi_end_p (gsi); )
5f40b3cb 1919 {
538dd0b7 1920 phi = gsi.phi ();
5f40b3cb 1921 res = PHI_RESULT (phi);
ea057359 1922 if (virtual_operand_p (res))
726a989a
RB
1923 {
1924 gsi_next (&gsi);
1925 continue;
1926 }
5f40b3cb 1927
a509ebb5 1928 /* Check if it is a part of reduction. If it is,
b8698a0f
L
1929 keep the phi at the reduction's keep_res field. The
1930 PHI_RESULT of this phi is the resulting value of the reduction
a509ebb5
RL
1931 variable when exiting the loop. */
1932
c203e8a7 1933 if (reduction_list->elements () > 0)
a509ebb5
RL
1934 {
1935 struct reduction_info *red;
1936
1937 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
a509ebb5
RL
1938 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1939 if (red)
726a989a
RB
1940 {
1941 red->keep_res = phi;
1942 gsi_next (&gsi);
1943 continue;
1944 }
a509ebb5 1945 }
726a989a
RB
1946 gcc_assert (control_name == NULL_TREE
1947 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
5f40b3cb 1948 control_name = res;
726a989a 1949 remove_phi_node (&gsi, false);
5f40b3cb
ZD
1950 }
1951 gcc_assert (control_name != NULL_TREE);
5f40b3cb 1952
b8698a0f 1953 /* Initialize the control variable to number of iterations
48710229 1954 according to the rhs of the exit condition. */
538dd0b7
DM
1955 gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
1956 cond_nit = as_a <gcond *> (last_stmt (exit->src));
48710229
RL
1957 nit_1 = gimple_cond_rhs (cond_nit);
1958 nit_1 = force_gimple_operand_gsi (&gsi,
1959 fold_convert (TREE_TYPE (control_name), nit_1),
726a989a 1960 false, NULL_TREE, false, GSI_SAME_STMT);
48710229 1961 stmt = gimple_build_assign (control_name, nit_1);
726a989a 1962 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1963}
1964
1965/* Create the parallel constructs for LOOP as described in gen_parallel_loop.
726a989a 1966 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
5f40b3cb
ZD
1967 NEW_DATA is the variable that should be initialized from the argument
1968 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
726a989a 1969 basic block containing GIMPLE_OMP_PARALLEL tree. */
5f40b3cb
ZD
1970
1971static basic_block
1972create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
9ff70652 1973 tree new_data, unsigned n_threads, location_t loc)
5f40b3cb 1974{
726a989a 1975 gimple_stmt_iterator gsi;
5f40b3cb 1976 basic_block bb, paral_bb, for_bb, ex_bb;
0f900dfa 1977 tree t, param;
538dd0b7
DM
1978 gomp_parallel *omp_par_stmt;
1979 gimple omp_return_stmt1, omp_return_stmt2;
1980 gimple phi;
1981 gcond *cond_stmt;
1982 gomp_for *for_stmt;
1983 gomp_continue *omp_cont_stmt;
726a989a 1984 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
5f40b3cb
ZD
1985 edge exit, nexit, guard, end, e;
1986
726a989a 1987 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
5f40b3cb
ZD
1988 bb = loop_preheader_edge (loop)->src;
1989 paral_bb = single_pred (bb);
726a989a 1990 gsi = gsi_last_bb (paral_bb);
5f40b3cb 1991
9ff70652 1992 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
5f40b3cb 1993 OMP_CLAUSE_NUM_THREADS_EXPR (t)
a509ebb5 1994 = build_int_cst (integer_type_node, n_threads);
538dd0b7
DM
1995 omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1996 gimple_set_location (omp_par_stmt, loc);
5f40b3cb 1997
538dd0b7 1998 gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1999
2000 /* Initialize NEW_DATA. */
2001 if (data)
2002 {
538dd0b7
DM
2003 gassign *assign_stmt;
2004
726a989a
RB
2005 gsi = gsi_after_labels (bb);
2006
b731b390 2007 param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
538dd0b7
DM
2008 assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
2009 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
726a989a 2010
538dd0b7 2011 assign_stmt = gimple_build_assign (new_data,
726a989a 2012 fold_convert (TREE_TYPE (new_data), param));
538dd0b7 2013 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5f40b3cb
ZD
2014 }
2015
726a989a 2016 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
5f40b3cb 2017 bb = split_loop_exit_edge (single_dom_exit (loop));
726a989a 2018 gsi = gsi_last_bb (bb);
538dd0b7
DM
2019 omp_return_stmt1 = gimple_build_omp_return (false);
2020 gimple_set_location (omp_return_stmt1, loc);
2021 gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
5f40b3cb 2022
726a989a 2023 /* Extract data for GIMPLE_OMP_FOR. */
5f40b3cb 2024 gcc_assert (loop->header == single_dom_exit (loop)->src);
538dd0b7 2025 cond_stmt = as_a <gcond *> (last_stmt (loop->header));
5f40b3cb 2026
726a989a 2027 cvar = gimple_cond_lhs (cond_stmt);
5f40b3cb
ZD
2028 cvar_base = SSA_NAME_VAR (cvar);
2029 phi = SSA_NAME_DEF_STMT (cvar);
2030 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
b731b390 2031 initvar = copy_ssa_name (cvar);
5f40b3cb
ZD
2032 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2033 initvar);
2034 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2035
1dff453d 2036 gsi = gsi_last_nondebug_bb (loop->latch);
726a989a
RB
2037 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2038 gsi_remove (&gsi, true);
5f40b3cb
ZD
2039
2040 /* Prepare cfg. */
2041 for_bb = split_edge (loop_preheader_edge (loop));
2042 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2043 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2044 gcc_assert (exit == single_dom_exit (loop));
2045
2046 guard = make_edge (for_bb, ex_bb, 0);
2047 single_succ_edge (loop->latch)->flags = 0;
2048 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
538dd0b7
DM
2049 for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2050 !gsi_end_p (gpi); gsi_next (&gpi))
5f40b3cb 2051 {
f5045c96
AM
2052 source_location locus;
2053 tree def;
538dd0b7
DM
2054 gphi *phi = gpi.phi ();
2055 gphi *stmt;
2056
2057 stmt = as_a <gphi *> (
2058 SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit)));
f5045c96
AM
2059
2060 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
b8698a0f 2061 locus = gimple_phi_arg_location_from_edge (stmt,
f5045c96 2062 loop_preheader_edge (loop));
9e227d60 2063 add_phi_arg (phi, def, guard, locus);
f5045c96
AM
2064
2065 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2066 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
9e227d60 2067 add_phi_arg (phi, def, end, locus);
5f40b3cb
ZD
2068 }
2069 e = redirect_edge_and_branch (exit, nexit->dest);
2070 PENDING_STMT (e) = NULL;
2071
726a989a
RB
2072 /* Emit GIMPLE_OMP_FOR. */
2073 gimple_cond_set_lhs (cond_stmt, cvar_base);
5f40b3cb 2074 type = TREE_TYPE (cvar);
9ff70652 2075 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
5f40b3cb
ZD
2076 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2077
74bf76ed 2078 for_stmt = gimple_build_omp_for (NULL, GF_OMP_FOR_KIND_FOR, t, 1, NULL);
9ff70652 2079 gimple_set_location (for_stmt, loc);
726a989a
RB
2080 gimple_omp_for_set_index (for_stmt, 0, initvar);
2081 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2082 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2083 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2084 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2085 cvar_base,
2086 build_int_cst (type, 1)));
2087
2088 gsi = gsi_last_bb (for_bb);
2089 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
5f40b3cb
ZD
2090 SSA_NAME_DEF_STMT (initvar) = for_stmt;
2091
726a989a
RB
2092 /* Emit GIMPLE_OMP_CONTINUE. */
2093 gsi = gsi_last_bb (loop->latch);
538dd0b7
DM
2094 omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2095 gimple_set_location (omp_cont_stmt, loc);
2096 gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2097 SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
5f40b3cb 2098
726a989a
RB
2099 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2100 gsi = gsi_last_bb (ex_bb);
538dd0b7
DM
2101 omp_return_stmt2 = gimple_build_omp_return (true);
2102 gimple_set_location (omp_return_stmt2, loc);
2103 gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
5f40b3cb 2104
cd7d9fd7
RG
2105 /* After the above dom info is hosed. Re-compute it. */
2106 free_dominance_info (CDI_DOMINATORS);
2107 calculate_dominance_info (CDI_DOMINATORS);
2108
5f40b3cb
ZD
2109 return paral_bb;
2110}
2111
08dab97a
RL
2112/* Generates code to execute the iterations of LOOP in N_THREADS
2113 threads in parallel.
2114
2115 NITER describes number of iterations of LOOP.
fa10beec 2116 REDUCTION_LIST describes the reductions existent in the LOOP. */
5f40b3cb
ZD
2117
2118static void
c203e8a7
TS
2119gen_parallel_loop (struct loop *loop,
2120 reduction_info_table_type *reduction_list,
a509ebb5 2121 unsigned n_threads, struct tree_niter_desc *niter)
5f40b3cb 2122{
5f40b3cb 2123 tree many_iterations_cond, type, nit;
726a989a
RB
2124 tree arg_struct, new_arg_struct;
2125 gimple_seq stmts;
9f9f72aa 2126 edge entry, exit;
a509ebb5 2127 struct clsn_data clsn_data;
5f40b3cb 2128 unsigned prob;
9ff70652
JJ
2129 location_t loc;
2130 gimple cond_stmt;
768da0da 2131 unsigned int m_p_thread=2;
5f40b3cb
ZD
2132
2133 /* From
2134
2135 ---------------------------------------------------------------------
2136 loop
2137 {
2138 IV = phi (INIT, IV + STEP)
2139 BODY1;
2140 if (COND)
2141 break;
2142 BODY2;
2143 }
2144 ---------------------------------------------------------------------
2145
2146 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2147 we generate the following code:
2148
2149 ---------------------------------------------------------------------
2150
2151 if (MAY_BE_ZERO
a509ebb5
RL
2152 || NITER < MIN_PER_THREAD * N_THREADS)
2153 goto original;
5f40b3cb
ZD
2154
2155 BODY1;
2156 store all local loop-invariant variables used in body of the loop to DATA.
726a989a 2157 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
5f40b3cb 2158 load the variables from DATA.
726a989a 2159 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
5f40b3cb
ZD
2160 BODY2;
2161 BODY1;
726a989a
RB
2162 GIMPLE_OMP_CONTINUE;
2163 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
2164 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
5f40b3cb
ZD
2165 goto end;
2166
2167 original:
2168 loop
2169 {
2170 IV = phi (INIT, IV + STEP)
2171 BODY1;
2172 if (COND)
2173 break;
2174 BODY2;
2175 }
2176
2177 end:
2178
2179 */
2180
2181 /* Create two versions of the loop -- in the old one, we know that the
2182 number of iterations is large enough, and we will transform it into the
2183 loop that will be split to loop_fn, the new one will be used for the
2184 remaining iterations. */
a509ebb5 2185
768da0da
RL
2186 /* We should compute a better number-of-iterations value for outer loops.
2187 That is, if we have
2188
2189 for (i = 0; i < n; ++i)
2190 for (j = 0; j < m; ++j)
2191 ...
2192
2193 we should compute nit = n * m, not nit = n.
2194 Also may_be_zero handling would need to be adjusted. */
2195
5f40b3cb
ZD
2196 type = TREE_TYPE (niter->niter);
2197 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
2198 NULL_TREE);
2199 if (stmts)
726a989a 2200 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb 2201
768da0da
RL
2202 if (loop->inner)
2203 m_p_thread=2;
2204 else
2205 m_p_thread=MIN_PER_THREAD;
2206
2207 many_iterations_cond =
2208 fold_build2 (GE_EXPR, boolean_type_node,
2209 nit, build_int_cst (type, m_p_thread * n_threads));
2210
5f40b3cb 2211 many_iterations_cond
a509ebb5
RL
2212 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2213 invert_truthvalue (unshare_expr (niter->may_be_zero)),
2214 many_iterations_cond);
5f40b3cb 2215 many_iterations_cond
a509ebb5 2216 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
5f40b3cb 2217 if (stmts)
726a989a 2218 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb
ZD
2219 if (!is_gimple_condexpr (many_iterations_cond))
2220 {
2221 many_iterations_cond
a509ebb5
RL
2222 = force_gimple_operand (many_iterations_cond, &stmts,
2223 true, NULL_TREE);
5f40b3cb 2224 if (stmts)
726a989a 2225 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb
ZD
2226 }
2227
2228 initialize_original_copy_tables ();
2229
2230 /* We assume that the loop usually iterates a lot. */
2231 prob = 4 * REG_BR_PROB_BASE / 5;
0f900dfa
JJ
2232 loop_version (loop, many_iterations_cond, NULL,
2233 prob, prob, REG_BR_PROB_BASE - prob, true);
5f40b3cb
ZD
2234 update_ssa (TODO_update_ssa);
2235 free_original_copy_tables ();
2236
2237 /* Base all the induction variables in LOOP on a single control one. */
c80a5403 2238 canonicalize_loop_ivs (loop, &nit, true);
5f40b3cb 2239
7c82d827
TV
2240 /* Ensure that the exit condition is the first statement in the loop.
2241 The common case is that latch of the loop is empty (apart from the
2242 increment) and immediately follows the loop exit test. Attempt to move the
2243 entry of the loop directly before the exit check and increase the number of
2244 iterations of the loop by one. */
2245 if (!try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
2246 {
2247 /* Fall back on the method that handles more cases, but duplicates the
2248 loop body: move the exit condition of LOOP to the beginning of its
2249 header, and duplicate the part of the last iteration that gets disabled
2250 to the exit of the loop. */
2251 transform_to_exit_first_loop (loop, reduction_list, nit);
2252 }
a509ebb5 2253
fa10beec 2254 /* Generate initializations for reductions. */
c203e8a7
TS
2255 if (reduction_list->elements () > 0)
2256 reduction_list->traverse <struct loop *, initialize_reductions> (loop);
5f40b3cb
ZD
2257
2258 /* Eliminate the references to local variables from the loop. */
9f9f72aa
AP
2259 gcc_assert (single_exit (loop));
2260 entry = loop_preheader_edge (loop);
2261 exit = single_dom_exit (loop);
5f40b3cb 2262
9f9f72aa 2263 eliminate_local_variables (entry, exit);
5f40b3cb
ZD
2264 /* In the old loop, move all variables non-local to the loop to a structure
2265 and back, and create separate decls for the variables used in loop. */
b8698a0f 2266 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
9f9f72aa 2267 &new_arg_struct, &clsn_data);
5f40b3cb
ZD
2268
2269 /* Create the parallel constructs. */
9ff70652
JJ
2270 loc = UNKNOWN_LOCATION;
2271 cond_stmt = last_stmt (loop->header);
2272 if (cond_stmt)
2273 loc = gimple_location (cond_stmt);
18751894
TV
2274 create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
2275 new_arg_struct, n_threads, loc);
c203e8a7 2276 if (reduction_list->elements () > 0)
a509ebb5 2277 create_call_for_reduction (loop, reduction_list, &clsn_data);
5f40b3cb
ZD
2278
2279 scev_reset ();
2280
2281 /* Cancel the loop (it is simpler to do it here rather than to teach the
2282 expander to do it). */
2283 cancel_loop_tree (loop);
2284
92a6bdbd
SP
2285 /* Free loop bound estimations that could contain references to
2286 removed statements. */
f0bd40b1 2287 FOR_EACH_LOOP (loop, 0)
92a6bdbd 2288 free_numbers_of_iterations_estimates_loop (loop);
5f40b3cb
ZD
2289}
2290
9857228c
SP
2291/* Returns true when LOOP contains vector phi nodes. */
2292
2293static bool
726a989a 2294loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
9857228c
SP
2295{
2296 unsigned i;
2297 basic_block *bbs = get_loop_body_in_dom_order (loop);
538dd0b7 2298 gphi_iterator gsi;
9857228c 2299 bool res = true;
9857228c
SP
2300
2301 for (i = 0; i < loop->num_nodes; i++)
726a989a 2302 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
538dd0b7 2303 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
9857228c
SP
2304 goto end;
2305
2306 res = false;
2307 end:
2308 free (bbs);
2309 return res;
2310}
2311
08dab97a
RL
2312/* Create a reduction_info struct, initialize it with REDUC_STMT
2313 and PHI, insert it to the REDUCTION_LIST. */
2314
2315static void
c203e8a7 2316build_new_reduction (reduction_info_table_type *reduction_list,
538dd0b7 2317 gimple reduc_stmt, gphi *phi)
08dab97a 2318{
4a8fb1a1 2319 reduction_info **slot;
08dab97a
RL
2320 struct reduction_info *new_reduction;
2321
2322 gcc_assert (reduc_stmt);
b8698a0f 2323
08dab97a
RL
2324 if (dump_file && (dump_flags & TDF_DETAILS))
2325 {
2326 fprintf (dump_file,
2327 "Detected reduction. reduction stmt is: \n");
2328 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
2329 fprintf (dump_file, "\n");
2330 }
b8698a0f 2331
08dab97a 2332 new_reduction = XCNEW (struct reduction_info);
b8698a0f 2333
08dab97a
RL
2334 new_reduction->reduc_stmt = reduc_stmt;
2335 new_reduction->reduc_phi = phi;
5d1fd1de 2336 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
08dab97a 2337 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
c203e8a7 2338 slot = reduction_list->find_slot (new_reduction, INSERT);
08dab97a
RL
2339 *slot = new_reduction;
2340}
2341
5d1fd1de
JJ
2342/* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
2343
4a8fb1a1
LC
2344int
2345set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
5d1fd1de 2346{
4a8fb1a1 2347 struct reduction_info *const red = *slot;
5d1fd1de
JJ
2348 gimple_set_uid (red->reduc_phi, red->reduc_version);
2349 return 1;
2350}
2351
08dab97a
RL
2352/* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
2353
2354static void
c203e8a7 2355gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
08dab97a 2356{
538dd0b7 2357 gphi_iterator gsi;
08dab97a
RL
2358 loop_vec_info simple_loop_info;
2359
08dab97a
RL
2360 simple_loop_info = vect_analyze_loop_form (loop);
2361
2362 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2363 {
538dd0b7 2364 gphi *phi = gsi.phi ();
08dab97a
RL
2365 affine_iv iv;
2366 tree res = PHI_RESULT (phi);
2367 bool double_reduc;
2368
ea057359 2369 if (virtual_operand_p (res))
08dab97a
RL
2370 continue;
2371
2372 if (!simple_iv (loop, loop, res, &iv, true)
2373 && simple_loop_info)
2374 {
8a9ecffd
MM
2375 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
2376 phi, true,
2377 &double_reduc);
48710229 2378 if (reduc_stmt && !double_reduc)
08dab97a
RL
2379 build_new_reduction (reduction_list, reduc_stmt, phi);
2380 }
2381 }
5d1fd1de
JJ
2382 destroy_loop_vec_info (simple_loop_info, true);
2383
2384 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2385 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2386 only now. */
c203e8a7 2387 reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
08dab97a
RL
2388}
2389
2390/* Try to initialize NITER for code generation part. */
2391
2392static bool
2393try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2394{
2395 edge exit = single_dom_exit (loop);
2396
2397 gcc_assert (exit);
2398
2399 /* We need to know # of iterations, and there should be no uses of values
2400 defined inside loop outside of it, unless the values are invariants of
2401 the loop. */
2402 if (!number_of_iterations_exit (loop, exit, niter, false))
2403 {
2404 if (dump_file && (dump_flags & TDF_DETAILS))
2405 fprintf (dump_file, " FAILED: number of iterations not known\n");
2406 return false;
2407 }
2408
2409 return true;
2410}
2411
2412/* Try to initialize REDUCTION_LIST for code generation part.
2413 REDUCTION_LIST describes the reductions. */
2414
2415static bool
4a8fb1a1 2416try_create_reduction_list (loop_p loop,
c203e8a7 2417 reduction_info_table_type *reduction_list)
08dab97a
RL
2418{
2419 edge exit = single_dom_exit (loop);
538dd0b7 2420 gphi_iterator gsi;
08dab97a
RL
2421
2422 gcc_assert (exit);
2423
2424 gather_scalar_reductions (loop, reduction_list);
2425
b8698a0f 2426
08dab97a
RL
2427 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2428 {
538dd0b7 2429 gphi *phi = gsi.phi ();
08dab97a
RL
2430 struct reduction_info *red;
2431 imm_use_iterator imm_iter;
2432 use_operand_p use_p;
2433 gimple reduc_phi;
2434 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2435
ea057359 2436 if (!virtual_operand_p (val))
08dab97a
RL
2437 {
2438 if (dump_file && (dump_flags & TDF_DETAILS))
2439 {
2440 fprintf (dump_file, "phi is ");
2441 print_gimple_stmt (dump_file, phi, 0, 0);
2442 fprintf (dump_file, "arg of phi to exit: value ");
2443 print_generic_expr (dump_file, val, 0);
2444 fprintf (dump_file, " used outside loop\n");
2445 fprintf (dump_file,
2446 " checking if it a part of reduction pattern: \n");
2447 }
c203e8a7 2448 if (reduction_list->elements () == 0)
08dab97a
RL
2449 {
2450 if (dump_file && (dump_flags & TDF_DETAILS))
2451 fprintf (dump_file,
2452 " FAILED: it is not a part of reduction.\n");
2453 return false;
2454 }
2455 reduc_phi = NULL;
2456 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2457 {
4942af9b
JJ
2458 if (!gimple_debug_bind_p (USE_STMT (use_p))
2459 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
08dab97a
RL
2460 {
2461 reduc_phi = USE_STMT (use_p);
2462 break;
2463 }
2464 }
2465 red = reduction_phi (reduction_list, reduc_phi);
2466 if (red == NULL)
2467 {
2468 if (dump_file && (dump_flags & TDF_DETAILS))
2469 fprintf (dump_file,
2470 " FAILED: it is not a part of reduction.\n");
2471 return false;
2472 }
2473 if (dump_file && (dump_flags & TDF_DETAILS))
2474 {
2475 fprintf (dump_file, "reduction phi is ");
2476 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2477 fprintf (dump_file, "reduction stmt is ");
2478 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2479 }
2480 }
2481 }
2482
2483 /* The iterations of the loop may communicate only through bivs whose
2484 iteration space can be distributed efficiently. */
2485 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2486 {
538dd0b7 2487 gphi *phi = gsi.phi ();
08dab97a
RL
2488 tree def = PHI_RESULT (phi);
2489 affine_iv iv;
2490
ea057359 2491 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
08dab97a
RL
2492 {
2493 struct reduction_info *red;
2494
2495 red = reduction_phi (reduction_list, phi);
2496 if (red == NULL)
2497 {
2498 if (dump_file && (dump_flags & TDF_DETAILS))
2499 fprintf (dump_file,
2500 " FAILED: scalar dependency between iterations\n");
2501 return false;
2502 }
2503 }
2504 }
2505
2506
2507 return true;
2508}
2509
5f40b3cb
ZD
2510/* Detect parallel loops and generate parallel code using libgomp
2511 primitives. Returns true if some loop was parallelized, false
2512 otherwise. */
2513
09489eb8 2514static bool
5f40b3cb
ZD
2515parallelize_loops (void)
2516{
2517 unsigned n_threads = flag_tree_parallelize_loops;
2518 bool changed = false;
2519 struct loop *loop;
2520 struct tree_niter_desc niter_desc;
f873b205 2521 struct obstack parloop_obstack;
8adfe01d 2522 HOST_WIDE_INT estimated;
b05e0233 2523 source_location loop_loc;
f873b205 2524
5f40b3cb
ZD
2525 /* Do not parallelize loops in the functions created by parallelization. */
2526 if (parallelized_function_p (cfun->decl))
2527 return false;
8adfe01d
RL
2528 if (cfun->has_nonlocal_label)
2529 return false;
5f40b3cb 2530
f873b205 2531 gcc_obstack_init (&parloop_obstack);
c203e8a7 2532 reduction_info_table_type reduction_list (10);
726a989a 2533 init_stmt_vec_info_vec ();
a509ebb5 2534
f0bd40b1 2535 FOR_EACH_LOOP (loop, 0)
5f40b3cb 2536 {
4a8fb1a1 2537 reduction_list.empty ();
48710229
RL
2538 if (dump_file && (dump_flags & TDF_DETAILS))
2539 {
2540 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2541 if (loop->inner)
2542 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2543 else
2544 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2545 }
b8698a0f 2546
48710229 2547 /* If we use autopar in graphite pass, we use its marked dependency
87d4d0ee
SP
2548 checking results. */
2549 if (flag_loop_parallelize_all && !loop->can_be_parallel)
48710229
RL
2550 {
2551 if (dump_file && (dump_flags & TDF_DETAILS))
2552 fprintf (dump_file, "loop is not parallel according to graphite\n");
87d4d0ee 2553 continue;
48710229 2554 }
87d4d0ee 2555
48710229
RL
2556 if (!single_dom_exit (loop))
2557 {
b8698a0f 2558
48710229
RL
2559 if (dump_file && (dump_flags & TDF_DETAILS))
2560 fprintf (dump_file, "loop is !single_dom_exit\n");
b8698a0f 2561
08dab97a 2562 continue;
48710229 2563 }
08dab97a
RL
2564
2565 if (/* And of course, the loop must be parallelizable. */
2566 !can_duplicate_loop_p (loop)
1d4af1e8 2567 || loop_has_blocks_with_irreducible_flag (loop)
8adfe01d 2568 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
9857228c 2569 /* FIXME: the check for vector phi nodes could be removed. */
69958396 2570 || loop_has_vector_phi_nodes (loop))
08dab97a 2571 continue;
e5b332cd 2572
652c4c71 2573 estimated = estimated_stmt_executions_int (loop);
e5b332cd
RG
2574 if (estimated == -1)
2575 estimated = max_stmt_executions_int (loop);
87d4d0ee 2576 /* FIXME: Bypass this check as graphite doesn't update the
e5b332cd 2577 count and frequency correctly now. */
87d4d0ee 2578 if (!flag_loop_parallelize_all
e5b332cd
RG
2579 && ((estimated != -1
2580 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
87d4d0ee
SP
2581 /* Do not bother with loops in cold areas. */
2582 || optimize_loop_nest_for_size_p (loop)))
08dab97a 2583 continue;
b8698a0f 2584
08dab97a
RL
2585 if (!try_get_loop_niter (loop, &niter_desc))
2586 continue;
2587
c203e8a7 2588 if (!try_create_reduction_list (loop, &reduction_list))
08dab97a
RL
2589 continue;
2590
f873b205
LB
2591 if (!flag_loop_parallelize_all
2592 && !loop_parallel_p (loop, &parloop_obstack))
5f40b3cb
ZD
2593 continue;
2594
2595 changed = true;
48710229
RL
2596 if (dump_file && (dump_flags & TDF_DETAILS))
2597 {
48710229 2598 if (loop->inner)
8adfe01d 2599 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
48710229 2600 else
8adfe01d
RL
2601 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2602 loop_loc = find_loop_location (loop);
b05e0233 2603 if (loop_loc != UNKNOWN_LOCATION)
8adfe01d 2604 fprintf (dump_file, "\nloop at %s:%d: ",
b05e0233 2605 LOCATION_FILE (loop_loc), LOCATION_LINE (loop_loc));
b8698a0f 2606 }
c203e8a7 2607 gen_parallel_loop (loop, &reduction_list,
08dab97a 2608 n_threads, &niter_desc);
5f40b3cb
ZD
2609 }
2610
726a989a 2611 free_stmt_vec_info_vec ();
f873b205 2612 obstack_free (&parloop_obstack, NULL);
6b8ed145
RG
2613
2614 /* Parallelization will cause new function calls to be inserted through
d086d311
RG
2615 which local variables will escape. Reset the points-to solution
2616 for ESCAPED. */
6b8ed145 2617 if (changed)
d086d311 2618 pt_solution_reset (&cfun->gimple_df->escaped);
6b8ed145 2619
5f40b3cb
ZD
2620 return changed;
2621}
2622
c1bf2a39
AM
2623/* Parallelization. */
2624
c1bf2a39
AM
2625namespace {
2626
2627const pass_data pass_data_parallelize_loops =
2628{
2629 GIMPLE_PASS, /* type */
2630 "parloops", /* name */
2631 OPTGROUP_LOOP, /* optinfo_flags */
c1bf2a39
AM
2632 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
2633 ( PROP_cfg | PROP_ssa ), /* properties_required */
2634 0, /* properties_provided */
2635 0, /* properties_destroyed */
2636 0, /* todo_flags_start */
3bea341f 2637 0, /* todo_flags_finish */
c1bf2a39
AM
2638};
2639
2640class pass_parallelize_loops : public gimple_opt_pass
2641{
2642public:
2643 pass_parallelize_loops (gcc::context *ctxt)
2644 : gimple_opt_pass (pass_data_parallelize_loops, ctxt)
2645 {}
2646
2647 /* opt_pass methods: */
1a3d085c 2648 virtual bool gate (function *) { return flag_tree_parallelize_loops > 1; }
be55bfe6 2649 virtual unsigned int execute (function *);
c1bf2a39
AM
2650
2651}; // class pass_parallelize_loops
2652
be55bfe6
TS
2653unsigned
2654pass_parallelize_loops::execute (function *fun)
2655{
2656 if (number_of_loops (fun) <= 1)
2657 return 0;
2658
2659 if (parallelize_loops ())
18751894
TV
2660 {
2661 fun->curr_properties &= ~(PROP_gimple_eomp);
2662 return TODO_update_ssa;
2663 }
2664
be55bfe6
TS
2665 return 0;
2666}
2667
c1bf2a39
AM
2668} // anon namespace
2669
2670gimple_opt_pass *
2671make_pass_parallelize_loops (gcc::context *ctxt)
2672{
2673 return new pass_parallelize_loops (ctxt);
2674}