]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-parloops.c
tree-if-conv.c (if_convertible_gimple_assign_stmt_p): Use ifcvt_could_trap_p also...
[thirdparty/gcc.git] / gcc / tree-parloops.c
CommitLineData
5f40b3cb 1/* Loop autoparallelization.
5624e564 2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
70837b71
RL
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5f40b3cb
ZD
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
6da7fc87 10Software Foundation; either version 3, or (at your option) any later
5f40b3cb
ZD
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
6da7fc87
NC
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
5f40b3cb
ZD
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
40e23961 25#include "alias.h"
c7131fb2 26#include "backend.h"
40e23961 27#include "tree.h"
c7131fb2 28#include "gimple.h"
60393bbc 29#include "hard-reg-set.h"
c7131fb2
AM
30#include "ssa.h"
31#include "options.h"
32#include "fold-const.h"
2fb9a547 33#include "internal-fn.h"
45b0be94 34#include "gimplify.h"
5be5c238 35#include "gimple-iterator.h"
18f429e2 36#include "gimplify-me.h"
5be5c238 37#include "gimple-walk.h"
d8a2d370
DN
38#include "stor-layout.h"
39#include "tree-nested.h"
442b4905 40#include "tree-cfg.h"
e28030cf
AM
41#include "tree-ssa-loop-ivopts.h"
42#include "tree-ssa-loop-manip.h"
43#include "tree-ssa-loop-niter.h"
442b4905
AM
44#include "tree-ssa-loop.h"
45#include "tree-into-ssa.h"
5f40b3cb 46#include "cfgloop.h"
5f40b3cb 47#include "tree-data-ref.h"
1bd6497c 48#include "tree-scalar-evolution.h"
cf835838 49#include "gimple-pretty-print.h"
5f40b3cb 50#include "tree-pass.h"
5f40b3cb 51#include "langhooks.h"
a509ebb5 52#include "tree-vectorizer.h"
4a8fb1a1 53#include "tree-hasher.h"
c1bf2a39 54#include "tree-parloops.h"
0645c1a2 55#include "omp-low.h"
1fe37220 56#include "tree-nested.h"
a79b7ec5 57#include "cgraph.h"
7c82d827 58#include "tree-ssa.h"
5f40b3cb
ZD
59
60/* This pass tries to distribute iterations of loops into several threads.
61 The implementation is straightforward -- for each loop we test whether its
62 iterations are independent, and if it is the case (and some additional
63 conditions regarding profitability and correctness are satisfied), we
726a989a
RB
64 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
65 machinery do its job.
b8698a0f 66
5f40b3cb
ZD
67 The most of the complexity is in bringing the code into shape expected
68 by the omp expanders:
726a989a
RB
69 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
70 variable and that the exit test is at the start of the loop body
71 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
5f40b3cb
ZD
72 variables by accesses through pointers, and breaking up ssa chains
73 by storing the values incoming to the parallelized loop to a structure
74 passed to the new function as an argument (something similar is done
75 in omp gimplification, unfortunately only a small part of the code
76 can be shared).
77
78 TODO:
79 -- if there are several parallelizable loops in a function, it may be
80 possible to generate the threads just once (using synchronization to
81 ensure that cross-loop dependences are obeyed).
70837b71
RL
82 -- handling of common reduction patterns for outer loops.
83
84 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
b8698a0f 85/*
a509ebb5 86 Reduction handling:
8a9ecffd 87 currently we use vect_force_simple_reduction() to detect reduction patterns.
a509ebb5 88 The code transformation will be introduced by an example.
b8698a0f
L
89
90
a509ebb5
RL
91parloop
92{
93 int sum=1;
94
0eb7e7aa 95 for (i = 0; i < N; i++)
a509ebb5
RL
96 {
97 x[i] = i + 3;
98 sum+=x[i];
99 }
100}
101
0eb7e7aa 102gimple-like code:
a509ebb5
RL
103header_bb:
104
0eb7e7aa
RL
105 # sum_29 = PHI <sum_11(5), 1(3)>
106 # i_28 = PHI <i_12(5), 0(3)>
107 D.1795_8 = i_28 + 3;
108 x[i_28] = D.1795_8;
109 sum_11 = D.1795_8 + sum_29;
110 i_12 = i_28 + 1;
111 if (N_6(D) > i_12)
112 goto header_bb;
113
a509ebb5
RL
114
115exit_bb:
116
0eb7e7aa
RL
117 # sum_21 = PHI <sum_11(4)>
118 printf (&"%d"[0], sum_21);
a509ebb5
RL
119
120
121after reduction transformation (only relevant parts):
122
123parloop
124{
125
126....
127
0eb7e7aa 128
fa10beec 129 # Storing the initial value given by the user. #
0eb7e7aa 130
ae0bce62 131 .paral_data_store.32.sum.27 = 1;
b8698a0f
L
132
133 #pragma omp parallel num_threads(4)
a509ebb5 134
0eb7e7aa 135 #pragma omp for schedule(static)
ae0bce62
RL
136
137 # The neutral element corresponding to the particular
138 reduction's operation, e.g. 0 for PLUS_EXPR,
139 1 for MULT_EXPR, etc. replaces the user's initial value. #
140
141 # sum.27_29 = PHI <sum.27_11, 0>
142
0eb7e7aa 143 sum.27_11 = D.1827_8 + sum.27_29;
ae0bce62 144
726a989a 145 GIMPLE_OMP_CONTINUE
a509ebb5 146
0eb7e7aa
RL
147 # Adding this reduction phi is done at create_phi_for_local_result() #
148 # sum.27_56 = PHI <sum.27_11, 0>
726a989a 149 GIMPLE_OMP_RETURN
b8698a0f
L
150
151 # Creating the atomic operation is done at
0eb7e7aa 152 create_call_for_reduction_1() #
a509ebb5 153
0eb7e7aa
RL
154 #pragma omp atomic_load
155 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
156 D.1840_60 = sum.27_56 + D.1839_59;
157 #pragma omp atomic_store (D.1840_60);
b8698a0f 158
726a989a 159 GIMPLE_OMP_RETURN
b8698a0f 160
0eb7e7aa
RL
161 # collecting the result after the join of the threads is done at
162 create_loads_for_reductions().
ae0bce62
RL
163 The value computed by the threads is loaded from the
164 shared struct. #
165
b8698a0f 166
0eb7e7aa 167 .paral_data_load.33_52 = &.paral_data_store.32;
ae0bce62 168 sum_37 = .paral_data_load.33_52->sum.27;
0eb7e7aa
RL
169 sum_43 = D.1795_41 + sum_37;
170
171 exit bb:
172 # sum_21 = PHI <sum_43, sum_26>
173 printf (&"%d"[0], sum_21);
174
175...
176
a509ebb5
RL
177}
178
179*/
180
5f40b3cb
ZD
181/* Minimal number of iterations of a loop that should be executed in each
182 thread. */
183#define MIN_PER_THREAD 100
184
b8698a0f 185/* Element of the hashtable, representing a
a509ebb5
RL
186 reduction in the current loop. */
187struct reduction_info
188{
726a989a
RB
189 gimple reduc_stmt; /* reduction statement. */
190 gimple reduc_phi; /* The phi node defining the reduction. */
191 enum tree_code reduction_code;/* code for the reduction operation. */
5d1fd1de
JJ
192 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
193 result. */
538dd0b7 194 gphi *keep_res; /* The PHI_RESULT of this phi is the resulting value
a509ebb5 195 of the reduction variable when existing the loop. */
ae0bce62 196 tree initial_value; /* The initial value of the reduction var before entering the loop. */
a509ebb5 197 tree field; /* the name of the field in the parloop data structure intended for reduction. */
a509ebb5 198 tree init; /* reduction initialization value. */
538dd0b7 199 gphi *new_phi; /* (helper field) Newly created phi node whose result
a509ebb5
RL
200 will be passed to the atomic operation. Represents
201 the local result each thread computed for the reduction
202 operation. */
203};
204
4a8fb1a1 205/* Reduction info hashtable helpers. */
a509ebb5 206
95fbe13e 207struct reduction_hasher : free_ptr_hash <reduction_info>
a509ebb5 208{
67f58944
TS
209 static inline hashval_t hash (const reduction_info *);
210 static inline bool equal (const reduction_info *, const reduction_info *);
4a8fb1a1
LC
211};
212
213/* Equality and hash functions for hashtab code. */
a509ebb5 214
4a8fb1a1 215inline bool
67f58944 216reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
4a8fb1a1 217{
a509ebb5
RL
218 return (a->reduc_phi == b->reduc_phi);
219}
220
4a8fb1a1 221inline hashval_t
67f58944 222reduction_hasher::hash (const reduction_info *a)
a509ebb5 223{
5d1fd1de 224 return a->reduc_version;
a509ebb5
RL
225}
226
c203e8a7 227typedef hash_table<reduction_hasher> reduction_info_table_type;
4a8fb1a1
LC
228
229
a509ebb5 230static struct reduction_info *
c203e8a7 231reduction_phi (reduction_info_table_type *reduction_list, gimple phi)
a509ebb5
RL
232{
233 struct reduction_info tmpred, *red;
234
c203e8a7 235 if (reduction_list->elements () == 0 || phi == NULL)
a509ebb5
RL
236 return NULL;
237
238 tmpred.reduc_phi = phi;
5d1fd1de 239 tmpred.reduc_version = gimple_uid (phi);
c203e8a7 240 red = reduction_list->find (&tmpred);
a509ebb5
RL
241
242 return red;
243}
244
5f40b3cb
ZD
245/* Element of hashtable of names to copy. */
246
247struct name_to_copy_elt
248{
249 unsigned version; /* The version of the name to copy. */
250 tree new_name; /* The new name used in the copy. */
251 tree field; /* The field of the structure used to pass the
252 value. */
253};
254
4a8fb1a1 255/* Name copies hashtable helpers. */
5f40b3cb 256
95fbe13e 257struct name_to_copy_hasher : free_ptr_hash <name_to_copy_elt>
5f40b3cb 258{
67f58944
TS
259 static inline hashval_t hash (const name_to_copy_elt *);
260 static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
4a8fb1a1
LC
261};
262
263/* Equality and hash functions for hashtab code. */
5f40b3cb 264
4a8fb1a1 265inline bool
67f58944 266name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
4a8fb1a1 267{
5f40b3cb
ZD
268 return a->version == b->version;
269}
270
4a8fb1a1 271inline hashval_t
67f58944 272name_to_copy_hasher::hash (const name_to_copy_elt *a)
5f40b3cb 273{
5f40b3cb
ZD
274 return (hashval_t) a->version;
275}
276
c203e8a7 277typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
4a8fb1a1 278
b305e3da
SP
279/* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
280 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
281 represents the denominator for every element in the matrix. */
282typedef struct lambda_trans_matrix_s
283{
284 lambda_matrix matrix;
285 int rowsize;
286 int colsize;
287 int denominator;
288} *lambda_trans_matrix;
289#define LTM_MATRIX(T) ((T)->matrix)
290#define LTM_ROWSIZE(T) ((T)->rowsize)
291#define LTM_COLSIZE(T) ((T)->colsize)
292#define LTM_DENOMINATOR(T) ((T)->denominator)
293
294/* Allocate a new transformation matrix. */
295
296static lambda_trans_matrix
297lambda_trans_matrix_new (int colsize, int rowsize,
298 struct obstack * lambda_obstack)
299{
300 lambda_trans_matrix ret;
301
302 ret = (lambda_trans_matrix)
303 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
304 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
305 LTM_ROWSIZE (ret) = rowsize;
306 LTM_COLSIZE (ret) = colsize;
307 LTM_DENOMINATOR (ret) = 1;
308 return ret;
309}
310
311/* Multiply a vector VEC by a matrix MAT.
312 MAT is an M*N matrix, and VEC is a vector with length N. The result
313 is stored in DEST which must be a vector of length M. */
314
315static void
316lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
317 lambda_vector vec, lambda_vector dest)
318{
319 int i, j;
320
321 lambda_vector_clear (dest, m);
322 for (i = 0; i < m; i++)
323 for (j = 0; j < n; j++)
324 dest[i] += matrix[i][j] * vec[j];
325}
326
327/* Return true if TRANS is a legal transformation matrix that respects
328 the dependence vectors in DISTS and DIRS. The conservative answer
329 is false.
330
331 "Wolfe proves that a unimodular transformation represented by the
332 matrix T is legal when applied to a loop nest with a set of
333 lexicographically non-negative distance vectors RDG if and only if
334 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
335 i.e.: if and only if it transforms the lexicographically positive
336 distance vectors to lexicographically positive vectors. Note that
337 a unimodular matrix must transform the zero vector (and only it) to
338 the zero vector." S.Muchnick. */
339
340static bool
341lambda_transform_legal_p (lambda_trans_matrix trans,
342 int nb_loops,
9771b263 343 vec<ddr_p> dependence_relations)
b305e3da
SP
344{
345 unsigned int i, j;
346 lambda_vector distres;
347 struct data_dependence_relation *ddr;
348
349 gcc_assert (LTM_COLSIZE (trans) == nb_loops
350 && LTM_ROWSIZE (trans) == nb_loops);
351
352 /* When there are no dependences, the transformation is correct. */
9771b263 353 if (dependence_relations.length () == 0)
b305e3da
SP
354 return true;
355
9771b263 356 ddr = dependence_relations[0];
b305e3da
SP
357 if (ddr == NULL)
358 return true;
359
360 /* When there is an unknown relation in the dependence_relations, we
361 know that it is no worth looking at this loop nest: give up. */
362 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
363 return false;
364
365 distres = lambda_vector_new (nb_loops);
366
367 /* For each distance vector in the dependence graph. */
9771b263 368 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
b305e3da
SP
369 {
370 /* Don't care about relations for which we know that there is no
371 dependence, nor about read-read (aka. output-dependences):
372 these data accesses can happen in any order. */
373 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
374 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
375 continue;
376
377 /* Conservatively answer: "this transformation is not valid". */
378 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
379 return false;
380
381 /* If the dependence could not be captured by a distance vector,
382 conservatively answer that the transform is not valid. */
383 if (DDR_NUM_DIST_VECTS (ddr) == 0)
384 return false;
385
386 /* Compute trans.dist_vect */
387 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
388 {
389 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
390 DDR_DIST_VECT (ddr, j), distres);
391
392 if (!lambda_vector_lexico_pos (distres, nb_loops))
393 return false;
394 }
395 }
396 return true;
397}
08dab97a
RL
398
399/* Data dependency analysis. Returns true if the iterations of LOOP
400 are independent on each other (that is, if we can execute them
401 in parallel). */
5f40b3cb
ZD
402
403static bool
f873b205 404loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
5f40b3cb 405{
9771b263
DN
406 vec<ddr_p> dependence_relations;
407 vec<data_reference_p> datarefs;
5f40b3cb
ZD
408 lambda_trans_matrix trans;
409 bool ret = false;
5f40b3cb
ZD
410
411 if (dump_file && (dump_flags & TDF_DETAILS))
48710229
RL
412 {
413 fprintf (dump_file, "Considering loop %d\n", loop->num);
414 if (!loop->inner)
415 fprintf (dump_file, "loop is innermost\n");
b8698a0f 416 else
48710229
RL
417 fprintf (dump_file, "loop NOT innermost\n");
418 }
5f40b3cb 419
5f40b3cb
ZD
420 /* Check for problems with dependences. If the loop can be reversed,
421 the iterations are independent. */
00f96dc9 422 auto_vec<loop_p, 3> loop_nest;
9771b263 423 datarefs.create (10);
07687835 424 dependence_relations.create (100);
9ca3d00e
AB
425 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
426 &dependence_relations))
427 {
428 if (dump_file && (dump_flags & TDF_DETAILS))
429 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
430 ret = false;
431 goto end;
432 }
5f40b3cb
ZD
433 if (dump_file && (dump_flags & TDF_DETAILS))
434 dump_data_dependence_relations (dump_file, dependence_relations);
435
f873b205 436 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
5f40b3cb
ZD
437 LTM_MATRIX (trans)[0][0] = -1;
438
439 if (lambda_transform_legal_p (trans, 1, dependence_relations))
440 {
441 ret = true;
442 if (dump_file && (dump_flags & TDF_DETAILS))
443 fprintf (dump_file, " SUCCESS: may be parallelized\n");
444 }
445 else if (dump_file && (dump_flags & TDF_DETAILS))
a509ebb5
RL
446 fprintf (dump_file,
447 " FAILED: data dependencies exist across iterations\n");
5f40b3cb 448
9ca3d00e 449 end:
5f40b3cb
ZD
450 free_dependence_relations (dependence_relations);
451 free_data_refs (datarefs);
452
453 return ret;
454}
455
1d4af1e8
SP
456/* Return true when LOOP contains basic blocks marked with the
457 BB_IRREDUCIBLE_LOOP flag. */
458
459static inline bool
460loop_has_blocks_with_irreducible_flag (struct loop *loop)
461{
462 unsigned i;
463 basic_block *bbs = get_loop_body_in_dom_order (loop);
464 bool res = true;
465
466 for (i = 0; i < loop->num_nodes; i++)
467 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
468 goto end;
469
470 res = false;
471 end:
472 free (bbs);
473 return res;
474}
475
8a171a59 476/* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
9f9f72aa 477 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
8a171a59 478 to their addresses that can be reused. The address of OBJ is known to
cba1eb61
JJ
479 be invariant in the whole function. Other needed statements are placed
480 right before GSI. */
5f40b3cb
ZD
481
482static tree
4a8fb1a1 483take_address_of (tree obj, tree type, edge entry,
c203e8a7 484 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
5f40b3cb 485{
8a171a59 486 int uid;
83d5977e 487 tree *var_p, name, addr;
538dd0b7 488 gassign *stmt;
726a989a 489 gimple_seq stmts;
5f40b3cb 490
8a171a59
ZD
491 /* Since the address of OBJ is invariant, the trees may be shared.
492 Avoid rewriting unrelated parts of the code. */
493 obj = unshare_expr (obj);
494 for (var_p = &obj;
495 handled_component_p (*var_p);
496 var_p = &TREE_OPERAND (*var_p, 0))
497 continue;
8a171a59 498
c9a410f0
RG
499 /* Canonicalize the access to base on a MEM_REF. */
500 if (DECL_P (*var_p))
501 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
502
503 /* Assign a canonical SSA name to the address of the base decl used
504 in the address and share it for all accesses and addresses based
505 on it. */
506 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
84baa4b9
TS
507 int_tree_map elt;
508 elt.uid = uid;
509 int_tree_map *slot = decl_address->find_slot (elt, INSERT);
510 if (!slot->to)
5f40b3cb 511 {
cba1eb61
JJ
512 if (gsi == NULL)
513 return NULL;
c9a410f0 514 addr = TREE_OPERAND (*var_p, 0);
29b89442
JJ
515 const char *obj_name
516 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
517 if (obj_name)
518 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
519 else
b731b390 520 name = make_ssa_name (TREE_TYPE (addr));
83d5977e 521 stmt = gimple_build_assign (name, addr);
726a989a 522 gsi_insert_on_edge_immediate (entry, stmt);
5f40b3cb 523
84baa4b9
TS
524 slot->uid = uid;
525 slot->to = name;
5f40b3cb 526 }
8a171a59 527 else
84baa4b9 528 name = slot->to;
5f40b3cb 529
c9a410f0
RG
530 /* Express the address in terms of the canonical SSA name. */
531 TREE_OPERAND (*var_p, 0) = name;
cba1eb61
JJ
532 if (gsi == NULL)
533 return build_fold_addr_expr_with_type (obj, type);
534
c9a410f0
RG
535 name = force_gimple_operand (build_addr (obj, current_function_decl),
536 &stmts, true, NULL_TREE);
537 if (!gimple_seq_empty_p (stmts))
cba1eb61 538 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
5f40b3cb 539
c9a410f0 540 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
8a171a59 541 {
726a989a 542 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
8a171a59 543 NULL_TREE);
726a989a 544 if (!gimple_seq_empty_p (stmts))
cba1eb61 545 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
8a171a59 546 }
5f40b3cb
ZD
547
548 return name;
549}
550
a509ebb5 551/* Callback for htab_traverse. Create the initialization statement
b8698a0f 552 for reduction described in SLOT, and place it at the preheader of
a509ebb5
RL
553 the loop described in DATA. */
554
4a8fb1a1
LC
555int
556initialize_reductions (reduction_info **slot, struct loop *loop)
a509ebb5 557{
a509ebb5 558 tree init, c;
a509ebb5
RL
559 tree bvar, type, arg;
560 edge e;
561
4a8fb1a1 562 struct reduction_info *const reduc = *slot;
a509ebb5 563
b8698a0f 564 /* Create initialization in preheader:
a509ebb5
RL
565 reduction_variable = initialization value of reduction. */
566
b8698a0f 567 /* In the phi node at the header, replace the argument coming
a509ebb5
RL
568 from the preheader with the reduction initialization value. */
569
570 /* Create a new variable to initialize the reduction. */
571 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
572 bvar = create_tmp_var (type, "reduction");
a509ebb5 573
c2255bc4
AH
574 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
575 OMP_CLAUSE_REDUCTION);
a509ebb5 576 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
726a989a 577 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
a509ebb5
RL
578
579 init = omp_reduction_init (c, TREE_TYPE (bvar));
580 reduc->init = init;
581
b8698a0f
L
582 /* Replace the argument representing the initialization value
583 with the initialization value for the reduction (neutral
584 element for the particular operation, e.g. 0 for PLUS_EXPR,
585 1 for MULT_EXPR, etc).
586 Keep the old value in a new variable "reduction_initial",
587 that will be taken in consideration after the parallel
0eb7e7aa 588 computing is done. */
a509ebb5
RL
589
590 e = loop_preheader_edge (loop);
591 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
592 /* Create new variable to hold the initial value. */
a509ebb5 593
a509ebb5 594 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
0eb7e7aa 595 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
ae0bce62 596 reduc->initial_value = arg;
a509ebb5
RL
597 return 1;
598}
5f40b3cb
ZD
599
600struct elv_data
601{
726a989a 602 struct walk_stmt_info info;
9f9f72aa 603 edge entry;
c203e8a7 604 int_tree_htab_type *decl_address;
cba1eb61 605 gimple_stmt_iterator *gsi;
5f40b3cb 606 bool changed;
cba1eb61 607 bool reset;
5f40b3cb
ZD
608};
609
9f9f72aa
AP
610/* Eliminates references to local variables in *TP out of the single
611 entry single exit region starting at DTA->ENTRY.
612 DECL_ADDRESS contains addresses of the references that had their
613 address taken already. If the expression is changed, CHANGED is
614 set to true. Callback for walk_tree. */
a509ebb5 615
5f40b3cb 616static tree
8a171a59 617eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
5f40b3cb 618{
3d9a9f94 619 struct elv_data *const dta = (struct elv_data *) data;
8a171a59 620 tree t = *tp, var, addr, addr_type, type, obj;
5f40b3cb
ZD
621
622 if (DECL_P (t))
623 {
624 *walk_subtrees = 0;
625
626 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
627 return NULL_TREE;
628
629 type = TREE_TYPE (t);
630 addr_type = build_pointer_type (type);
cba1eb61
JJ
631 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
632 dta->gsi);
633 if (dta->gsi == NULL && addr == NULL_TREE)
634 {
635 dta->reset = true;
636 return NULL_TREE;
637 }
638
70f34814 639 *tp = build_simple_mem_ref (addr);
5f40b3cb
ZD
640
641 dta->changed = true;
642 return NULL_TREE;
643 }
644
645 if (TREE_CODE (t) == ADDR_EXPR)
646 {
8a171a59
ZD
647 /* ADDR_EXPR may appear in two contexts:
648 -- as a gimple operand, when the address taken is a function invariant
649 -- as gimple rhs, when the resulting address in not a function
650 invariant
651 We do not need to do anything special in the latter case (the base of
652 the memory reference whose address is taken may be replaced in the
653 DECL_P case). The former case is more complicated, as we need to
654 ensure that the new address is still a gimple operand. Thus, it
655 is not sufficient to replace just the base of the memory reference --
656 we need to move the whole computation of the address out of the
657 loop. */
658 if (!is_gimple_val (t))
5f40b3cb
ZD
659 return NULL_TREE;
660
661 *walk_subtrees = 0;
8a171a59
ZD
662 obj = TREE_OPERAND (t, 0);
663 var = get_base_address (obj);
664 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
5f40b3cb
ZD
665 return NULL_TREE;
666
667 addr_type = TREE_TYPE (t);
cba1eb61
JJ
668 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
669 dta->gsi);
670 if (dta->gsi == NULL && addr == NULL_TREE)
671 {
672 dta->reset = true;
673 return NULL_TREE;
674 }
5f40b3cb
ZD
675 *tp = addr;
676
677 dta->changed = true;
678 return NULL_TREE;
679 }
680
726a989a 681 if (!EXPR_P (t))
5f40b3cb
ZD
682 *walk_subtrees = 0;
683
684 return NULL_TREE;
685}
686
cba1eb61 687/* Moves the references to local variables in STMT at *GSI out of the single
9f9f72aa
AP
688 entry single exit region starting at ENTRY. DECL_ADDRESS contains
689 addresses of the references that had their address taken
690 already. */
5f40b3cb
ZD
691
692static void
cba1eb61 693eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
c203e8a7 694 int_tree_htab_type *decl_address)
5f40b3cb
ZD
695{
696 struct elv_data dta;
cba1eb61 697 gimple stmt = gsi_stmt (*gsi);
5f40b3cb 698
726a989a 699 memset (&dta.info, '\0', sizeof (dta.info));
9f9f72aa 700 dta.entry = entry;
5f40b3cb
ZD
701 dta.decl_address = decl_address;
702 dta.changed = false;
cba1eb61 703 dta.reset = false;
5f40b3cb 704
b5b8b0ac 705 if (gimple_debug_bind_p (stmt))
cba1eb61
JJ
706 {
707 dta.gsi = NULL;
708 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
709 eliminate_local_variables_1, &dta.info, NULL);
710 if (dta.reset)
711 {
712 gimple_debug_bind_reset_value (stmt);
713 dta.changed = true;
714 }
715 }
29b89442
JJ
716 else if (gimple_clobber_p (stmt))
717 {
718 stmt = gimple_build_nop ();
719 gsi_replace (gsi, stmt, false);
720 dta.changed = true;
721 }
b5b8b0ac 722 else
cba1eb61
JJ
723 {
724 dta.gsi = gsi;
725 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
726 }
5f40b3cb
ZD
727
728 if (dta.changed)
729 update_stmt (stmt);
730}
731
9f9f72aa
AP
732/* Eliminates the references to local variables from the single entry
733 single exit region between the ENTRY and EXIT edges.
b8698a0f 734
a509ebb5 735 This includes:
b8698a0f
L
736 1) Taking address of a local variable -- these are moved out of the
737 region (and temporary variable is created to hold the address if
a509ebb5 738 necessary).
9f9f72aa 739
5f40b3cb 740 2) Dereferencing a local variable -- these are replaced with indirect
a509ebb5 741 references. */
5f40b3cb
ZD
742
743static void
9f9f72aa 744eliminate_local_variables (edge entry, edge exit)
5f40b3cb 745{
9f9f72aa 746 basic_block bb;
00f96dc9 747 auto_vec<basic_block, 3> body;
5f40b3cb 748 unsigned i;
726a989a 749 gimple_stmt_iterator gsi;
cba1eb61 750 bool has_debug_stmt = false;
c203e8a7 751 int_tree_htab_type decl_address (10);
9f9f72aa
AP
752 basic_block entry_bb = entry->src;
753 basic_block exit_bb = exit->dest;
5f40b3cb 754
9f9f72aa 755 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 756
9771b263 757 FOR_EACH_VEC_ELT (body, i, bb)
9f9f72aa 758 if (bb != entry_bb && bb != exit_bb)
726a989a 759 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
ddb555ed
JJ
760 if (is_gimple_debug (gsi_stmt (gsi)))
761 {
762 if (gimple_debug_bind_p (gsi_stmt (gsi)))
763 has_debug_stmt = true;
764 }
cba1eb61 765 else
c203e8a7 766 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
cba1eb61
JJ
767
768 if (has_debug_stmt)
9771b263 769 FOR_EACH_VEC_ELT (body, i, bb)
cba1eb61
JJ
770 if (bb != entry_bb && bb != exit_bb)
771 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
772 if (gimple_debug_bind_p (gsi_stmt (gsi)))
c203e8a7 773 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
9f9f72aa
AP
774}
775
776/* Returns true if expression EXPR is not defined between ENTRY and
777 EXIT, i.e. if all its operands are defined outside of the region. */
778
779static bool
780expr_invariant_in_region_p (edge entry, edge exit, tree expr)
781{
782 basic_block entry_bb = entry->src;
783 basic_block exit_bb = exit->dest;
784 basic_block def_bb;
9f9f72aa
AP
785
786 if (is_gimple_min_invariant (expr))
787 return true;
788
789 if (TREE_CODE (expr) == SSA_NAME)
790 {
726a989a 791 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
9f9f72aa
AP
792 if (def_bb
793 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
794 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
795 return false;
796
797 return true;
798 }
799
726a989a 800 return false;
5f40b3cb
ZD
801}
802
803/* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
804 The copies are stored to NAME_COPIES, if NAME was already duplicated,
805 its duplicate stored in NAME_COPIES is returned.
b8698a0f 806
5f40b3cb
ZD
807 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
808 duplicated, storing the copies in DECL_COPIES. */
809
810static tree
c203e8a7
TS
811separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
812 int_tree_htab_type *decl_copies,
813 bool copy_name_p)
5f40b3cb
ZD
814{
815 tree copy, var, var_copy;
816 unsigned idx, uid, nuid;
84baa4b9 817 struct int_tree_map ielt;
5f40b3cb 818 struct name_to_copy_elt elt, *nelt;
4a8fb1a1 819 name_to_copy_elt **slot;
84baa4b9 820 int_tree_map *dslot;
5f40b3cb
ZD
821
822 if (TREE_CODE (name) != SSA_NAME)
823 return name;
824
825 idx = SSA_NAME_VERSION (name);
826 elt.version = idx;
c203e8a7
TS
827 slot = name_copies->find_slot_with_hash (&elt, idx,
828 copy_name_p ? INSERT : NO_INSERT);
5f40b3cb 829 if (slot && *slot)
4a8fb1a1 830 return (*slot)->new_name;
5f40b3cb 831
70b5e7dc
RG
832 if (copy_name_p)
833 {
834 copy = duplicate_ssa_name (name, NULL);
835 nelt = XNEW (struct name_to_copy_elt);
836 nelt->version = idx;
837 nelt->new_name = copy;
838 nelt->field = NULL_TREE;
839 *slot = nelt;
840 }
841 else
842 {
843 gcc_assert (!slot);
844 copy = name;
845 }
846
5f40b3cb 847 var = SSA_NAME_VAR (name);
70b5e7dc
RG
848 if (!var)
849 return copy;
850
5f40b3cb
ZD
851 uid = DECL_UID (var);
852 ielt.uid = uid;
84baa4b9
TS
853 dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
854 if (!dslot->to)
5f40b3cb
ZD
855 {
856 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
36ad7922 857 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
84baa4b9
TS
858 dslot->uid = uid;
859 dslot->to = var_copy;
5f40b3cb
ZD
860
861 /* Ensure that when we meet this decl next time, we won't duplicate
a509ebb5 862 it again. */
5f40b3cb
ZD
863 nuid = DECL_UID (var_copy);
864 ielt.uid = nuid;
84baa4b9
TS
865 dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
866 gcc_assert (!dslot->to);
867 dslot->uid = nuid;
868 dslot->to = var_copy;
5f40b3cb
ZD
869 }
870 else
84baa4b9 871 var_copy = dslot->to;
5f40b3cb 872
b2ec94d4 873 replace_ssa_name_symbol (copy, var_copy);
5f40b3cb
ZD
874 return copy;
875}
876
9f9f72aa
AP
877/* Finds the ssa names used in STMT that are defined outside the
878 region between ENTRY and EXIT and replaces such ssa names with
879 their duplicates. The duplicates are stored to NAME_COPIES. Base
880 decls of all ssa names used in STMT (including those defined in
881 LOOP) are replaced with the new temporary variables; the
882 replacement decls are stored in DECL_COPIES. */
5f40b3cb
ZD
883
884static void
726a989a 885separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
c203e8a7
TS
886 name_to_copy_table_type *name_copies,
887 int_tree_htab_type *decl_copies)
5f40b3cb
ZD
888{
889 use_operand_p use;
890 def_operand_p def;
891 ssa_op_iter oi;
892 tree name, copy;
893 bool copy_name_p;
894
5f40b3cb 895 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
a509ebb5
RL
896 {
897 name = DEF_FROM_PTR (def);
898 gcc_assert (TREE_CODE (name) == SSA_NAME);
9f9f72aa
AP
899 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
900 false);
a509ebb5
RL
901 gcc_assert (copy == name);
902 }
5f40b3cb
ZD
903
904 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
a509ebb5
RL
905 {
906 name = USE_FROM_PTR (use);
907 if (TREE_CODE (name) != SSA_NAME)
908 continue;
909
9f9f72aa
AP
910 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
911 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
912 copy_name_p);
a509ebb5
RL
913 SET_USE (use, copy);
914 }
5f40b3cb
ZD
915}
916
b5b8b0ac
AO
917/* Finds the ssa names used in STMT that are defined outside the
918 region between ENTRY and EXIT and replaces such ssa names with
919 their duplicates. The duplicates are stored to NAME_COPIES. Base
920 decls of all ssa names used in STMT (including those defined in
921 LOOP) are replaced with the new temporary variables; the
922 replacement decls are stored in DECL_COPIES. */
923
924static bool
4a8fb1a1 925separate_decls_in_region_debug (gimple stmt,
c203e8a7
TS
926 name_to_copy_table_type *name_copies,
927 int_tree_htab_type *decl_copies)
b5b8b0ac
AO
928{
929 use_operand_p use;
930 ssa_op_iter oi;
931 tree var, name;
932 struct int_tree_map ielt;
933 struct name_to_copy_elt elt;
4a8fb1a1 934 name_to_copy_elt **slot;
84baa4b9 935 int_tree_map *dslot;
b5b8b0ac 936
ddb555ed
JJ
937 if (gimple_debug_bind_p (stmt))
938 var = gimple_debug_bind_get_var (stmt);
939 else if (gimple_debug_source_bind_p (stmt))
940 var = gimple_debug_source_bind_get_var (stmt);
941 else
942 return true;
598e67d7 943 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
4f2a9af8 944 return true;
b5b8b0ac
AO
945 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
946 ielt.uid = DECL_UID (var);
84baa4b9 947 dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
b5b8b0ac
AO
948 if (!dslot)
949 return true;
ddb555ed 950 if (gimple_debug_bind_p (stmt))
84baa4b9 951 gimple_debug_bind_set_var (stmt, dslot->to);
ddb555ed 952 else if (gimple_debug_source_bind_p (stmt))
84baa4b9 953 gimple_debug_source_bind_set_var (stmt, dslot->to);
b5b8b0ac
AO
954
955 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
956 {
957 name = USE_FROM_PTR (use);
958 if (TREE_CODE (name) != SSA_NAME)
959 continue;
960
961 elt.version = SSA_NAME_VERSION (name);
c203e8a7 962 slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
b5b8b0ac
AO
963 if (!slot)
964 {
965 gimple_debug_bind_reset_value (stmt);
966 update_stmt (stmt);
967 break;
968 }
969
4a8fb1a1 970 SET_USE (use, (*slot)->new_name);
b5b8b0ac
AO
971 }
972
973 return false;
974}
975
0eb7e7aa
RL
976/* Callback for htab_traverse. Adds a field corresponding to the reduction
977 specified in SLOT. The type is passed in DATA. */
978
4a8fb1a1
LC
979int
980add_field_for_reduction (reduction_info **slot, tree type)
a509ebb5 981{
b8698a0f 982
4a8fb1a1 983 struct reduction_info *const red = *slot;
aa06a978
RB
984 tree var = gimple_assign_lhs (red->reduc_stmt);
985 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
986 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
0eb7e7aa
RL
987
988 insert_field_into_struct (type, field);
989
990 red->field = field;
991
992 return 1;
993}
a509ebb5 994
5f40b3cb 995/* Callback for htab_traverse. Adds a field corresponding to a ssa name
b8698a0f 996 described in SLOT. The type is passed in DATA. */
5f40b3cb 997
4a8fb1a1
LC
998int
999add_field_for_name (name_to_copy_elt **slot, tree type)
5f40b3cb 1000{
4a8fb1a1 1001 struct name_to_copy_elt *const elt = *slot;
5f40b3cb 1002 tree name = ssa_name (elt->version);
70b5e7dc
RG
1003 tree field = build_decl (UNKNOWN_LOCATION,
1004 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1005 TREE_TYPE (name));
5f40b3cb
ZD
1006
1007 insert_field_into_struct (type, field);
1008 elt->field = field;
a509ebb5 1009
5f40b3cb
ZD
1010 return 1;
1011}
1012
b8698a0f
L
1013/* Callback for htab_traverse. A local result is the intermediate result
1014 computed by a single
fa10beec 1015 thread, or the initial value in case no iteration was executed.
b8698a0f
L
1016 This function creates a phi node reflecting these values.
1017 The phi's result will be stored in NEW_PHI field of the
1018 reduction's data structure. */
a509ebb5 1019
4a8fb1a1
LC
1020int
1021create_phi_for_local_result (reduction_info **slot, struct loop *loop)
a509ebb5 1022{
4a8fb1a1 1023 struct reduction_info *const reduc = *slot;
a509ebb5 1024 edge e;
538dd0b7 1025 gphi *new_phi;
a509ebb5
RL
1026 basic_block store_bb;
1027 tree local_res;
f5045c96 1028 source_location locus;
a509ebb5 1029
b8698a0f
L
1030 /* STORE_BB is the block where the phi
1031 should be stored. It is the destination of the loop exit.
726a989a 1032 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
a509ebb5
RL
1033 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1034
1035 /* STORE_BB has two predecessors. One coming from the loop
1036 (the reduction's result is computed at the loop),
b8698a0f
L
1037 and another coming from a block preceding the loop,
1038 when no iterations
1039 are executed (the initial value should be taken). */
a509ebb5
RL
1040 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1041 e = EDGE_PRED (store_bb, 1);
1042 else
1043 e = EDGE_PRED (store_bb, 0);
b731b390 1044 local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt));
f5045c96 1045 locus = gimple_location (reduc->reduc_stmt);
a509ebb5 1046 new_phi = create_phi_node (local_res, store_bb);
9e227d60 1047 add_phi_arg (new_phi, reduc->init, e, locus);
726a989a 1048 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
9e227d60 1049 FALLTHRU_EDGE (loop->latch), locus);
a509ebb5
RL
1050 reduc->new_phi = new_phi;
1051
1052 return 1;
1053}
5f40b3cb
ZD
1054
1055struct clsn_data
1056{
1057 tree store;
1058 tree load;
1059
1060 basic_block store_bb;
1061 basic_block load_bb;
1062};
1063
a509ebb5 1064/* Callback for htab_traverse. Create an atomic instruction for the
b8698a0f 1065 reduction described in SLOT.
a509ebb5
RL
1066 DATA annotates the place in memory the atomic operation relates to,
1067 and the basic block it needs to be generated in. */
1068
4a8fb1a1
LC
1069int
1070create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
a509ebb5 1071{
4a8fb1a1 1072 struct reduction_info *const reduc = *slot;
726a989a 1073 gimple_stmt_iterator gsi;
a509ebb5 1074 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
a509ebb5
RL
1075 tree load_struct;
1076 basic_block bb;
1077 basic_block new_bb;
1078 edge e;
0f900dfa 1079 tree t, addr, ref, x;
726a989a
RB
1080 tree tmp_load, name;
1081 gimple load;
a509ebb5 1082
70f34814 1083 load_struct = build_simple_mem_ref (clsn_data->load);
a509ebb5 1084 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
a509ebb5
RL
1085
1086 addr = build_addr (t, current_function_decl);
1087
1088 /* Create phi node. */
1089 bb = clsn_data->load_bb;
1090
b13c907a
RB
1091 gsi = gsi_last_bb (bb);
1092 e = split_block (bb, gsi_stmt (gsi));
a509ebb5
RL
1093 new_bb = e->dest;
1094
b731b390
JJ
1095 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1096 tmp_load = make_ssa_name (tmp_load);
726a989a 1097 load = gimple_build_omp_atomic_load (tmp_load, addr);
a509ebb5 1098 SSA_NAME_DEF_STMT (tmp_load) = load;
726a989a
RB
1099 gsi = gsi_start_bb (new_bb);
1100 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
a509ebb5
RL
1101
1102 e = split_block (new_bb, load);
1103 new_bb = e->dest;
726a989a 1104 gsi = gsi_start_bb (new_bb);
a509ebb5 1105 ref = tmp_load;
726a989a
RB
1106 x = fold_build2 (reduc->reduction_code,
1107 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1108 PHI_RESULT (reduc->new_phi));
a509ebb5 1109
726a989a
RB
1110 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1111 GSI_CONTINUE_LINKING);
a509ebb5 1112
726a989a 1113 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
a509ebb5
RL
1114 return 1;
1115}
1116
b8698a0f
L
1117/* Create the atomic operation at the join point of the threads.
1118 REDUCTION_LIST describes the reductions in the LOOP.
1119 LD_ST_DATA describes the shared data structure where
a509ebb5
RL
1120 shared data is stored in and loaded from. */
1121static void
4a8fb1a1 1122create_call_for_reduction (struct loop *loop,
c203e8a7 1123 reduction_info_table_type *reduction_list,
a509ebb5
RL
1124 struct clsn_data *ld_st_data)
1125{
c203e8a7 1126 reduction_list->traverse <struct loop *, create_phi_for_local_result> (loop);
726a989a 1127 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
a509ebb5 1128 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
4a8fb1a1 1129 reduction_list
c203e8a7 1130 ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
a509ebb5
RL
1131}
1132
ae0bce62
RL
1133/* Callback for htab_traverse. Loads the final reduction value at the
1134 join point of all threads, and inserts it in the right place. */
a509ebb5 1135
4a8fb1a1
LC
1136int
1137create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
a509ebb5 1138{
4a8fb1a1 1139 struct reduction_info *const red = *slot;
726a989a
RB
1140 gimple stmt;
1141 gimple_stmt_iterator gsi;
1142 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
a509ebb5 1143 tree load_struct;
ae0bce62 1144 tree name;
a509ebb5
RL
1145 tree x;
1146
726a989a 1147 gsi = gsi_after_labels (clsn_data->load_bb);
70f34814 1148 load_struct = build_simple_mem_ref (clsn_data->load);
a509ebb5
RL
1149 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1150 NULL_TREE);
a509ebb5 1151
ae0bce62 1152 x = load_struct;
a509ebb5 1153 name = PHI_RESULT (red->keep_res);
726a989a 1154 stmt = gimple_build_assign (name, x);
a509ebb5 1155
726a989a 1156 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
a509ebb5 1157
726a989a
RB
1158 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1159 !gsi_end_p (gsi); gsi_next (&gsi))
1160 if (gsi_stmt (gsi) == red->keep_res)
1161 {
1162 remove_phi_node (&gsi, false);
1163 return 1;
1164 }
1165 gcc_unreachable ();
a509ebb5
RL
1166}
1167
b8698a0f 1168/* Load the reduction result that was stored in LD_ST_DATA.
a509ebb5 1169 REDUCTION_LIST describes the list of reductions that the
fa10beec 1170 loads should be generated for. */
a509ebb5 1171static void
c203e8a7 1172create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
a509ebb5
RL
1173 struct clsn_data *ld_st_data)
1174{
726a989a 1175 gimple_stmt_iterator gsi;
a509ebb5 1176 tree t;
726a989a 1177 gimple stmt;
a509ebb5 1178
726a989a 1179 gsi = gsi_after_labels (ld_st_data->load_bb);
a509ebb5 1180 t = build_fold_addr_expr (ld_st_data->store);
726a989a 1181 stmt = gimple_build_assign (ld_st_data->load, t);
a509ebb5 1182
726a989a 1183 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
a509ebb5 1184
4a8fb1a1 1185 reduction_list
c203e8a7 1186 ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
a509ebb5
RL
1187
1188}
1189
0eb7e7aa
RL
1190/* Callback for htab_traverse. Store the neutral value for the
1191 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1192 1 for MULT_EXPR, etc. into the reduction field.
b8698a0f
L
1193 The reduction is specified in SLOT. The store information is
1194 passed in DATA. */
0eb7e7aa 1195
4a8fb1a1
LC
1196int
1197create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
0eb7e7aa 1198{
4a8fb1a1 1199 struct reduction_info *const red = *slot;
726a989a
RB
1200 tree t;
1201 gimple stmt;
1202 gimple_stmt_iterator gsi;
1203 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1204
1205 gsi = gsi_last_bb (clsn_data->store_bb);
1206 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1207 stmt = gimple_build_assign (t, red->initial_value);
726a989a 1208 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
0eb7e7aa
RL
1209
1210 return 1;
1211}
1212
a509ebb5
RL
1213/* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1214 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1215 specified in SLOT. */
1216
4a8fb1a1
LC
1217int
1218create_loads_and_stores_for_name (name_to_copy_elt **slot,
1219 struct clsn_data *clsn_data)
5f40b3cb 1220{
4a8fb1a1 1221 struct name_to_copy_elt *const elt = *slot;
726a989a
RB
1222 tree t;
1223 gimple stmt;
1224 gimple_stmt_iterator gsi;
5f40b3cb 1225 tree type = TREE_TYPE (elt->new_name);
5f40b3cb
ZD
1226 tree load_struct;
1227
726a989a
RB
1228 gsi = gsi_last_bb (clsn_data->store_bb);
1229 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1230 stmt = gimple_build_assign (t, ssa_name (elt->version));
726a989a 1231 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb 1232
726a989a 1233 gsi = gsi_last_bb (clsn_data->load_bb);
70f34814 1234 load_struct = build_simple_mem_ref (clsn_data->load);
726a989a
RB
1235 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1236 stmt = gimple_build_assign (elt->new_name, t);
726a989a 1237 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1238
1239 return 1;
1240}
1241
1242/* Moves all the variables used in LOOP and defined outside of it (including
1243 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1244 name) to a structure created for this purpose. The code
b8698a0f 1245
5f40b3cb
ZD
1246 while (1)
1247 {
1248 use (a);
1249 use (b);
1250 }
1251
1252 is transformed this way:
1253
1254 bb0:
1255 old.a = a;
1256 old.b = b;
1257
1258 bb1:
1259 a' = new->a;
1260 b' = new->b;
1261 while (1)
1262 {
1263 use (a');
1264 use (b');
1265 }
1266
1267 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1268 pointer `new' is intentionally not initialized (the loop will be split to a
1269 separate function later, and `new' will be initialized from its arguments).
a509ebb5 1270 LD_ST_DATA holds information about the shared data structure used to pass
b8698a0f
L
1271 information among the threads. It is initialized here, and
1272 gen_parallel_loop will pass it to create_call_for_reduction that
1273 needs this information. REDUCTION_LIST describes the reductions
a509ebb5 1274 in LOOP. */
5f40b3cb
ZD
1275
1276static void
4a8fb1a1 1277separate_decls_in_region (edge entry, edge exit,
c203e8a7 1278 reduction_info_table_type *reduction_list,
b8698a0f 1279 tree *arg_struct, tree *new_arg_struct,
9f9f72aa 1280 struct clsn_data *ld_st_data)
a509ebb5 1281
5f40b3cb 1282{
9f9f72aa 1283 basic_block bb1 = split_edge (entry);
5f40b3cb 1284 basic_block bb0 = single_pred (bb1);
c203e8a7
TS
1285 name_to_copy_table_type name_copies (10);
1286 int_tree_htab_type decl_copies (10);
5f40b3cb 1287 unsigned i;
726a989a
RB
1288 tree type, type_name, nvar;
1289 gimple_stmt_iterator gsi;
5f40b3cb 1290 struct clsn_data clsn_data;
00f96dc9 1291 auto_vec<basic_block, 3> body;
9f9f72aa
AP
1292 basic_block bb;
1293 basic_block entry_bb = bb1;
1294 basic_block exit_bb = exit->dest;
b5b8b0ac 1295 bool has_debug_stmt = false;
5f40b3cb 1296
726a989a 1297 entry = single_succ_edge (entry_bb);
9f9f72aa 1298 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 1299
9771b263 1300 FOR_EACH_VEC_ELT (body, i, bb)
9f9f72aa 1301 {
b8698a0f 1302 if (bb != entry_bb && bb != exit_bb)
9f9f72aa 1303 {
726a989a
RB
1304 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1305 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
c203e8a7 1306 &name_copies, &decl_copies);
726a989a
RB
1307
1308 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
b5b8b0ac
AO
1309 {
1310 gimple stmt = gsi_stmt (gsi);
1311
1312 if (is_gimple_debug (stmt))
1313 has_debug_stmt = true;
1314 else
1315 separate_decls_in_region_stmt (entry, exit, stmt,
c203e8a7 1316 &name_copies, &decl_copies);
b5b8b0ac 1317 }
9f9f72aa 1318 }
5f40b3cb 1319 }
9f9f72aa 1320
b5b8b0ac
AO
1321 /* Now process debug bind stmts. We must not create decls while
1322 processing debug stmts, so we defer their processing so as to
1323 make sure we will have debug info for as many variables as
1324 possible (all of those that were dealt with in the loop above),
1325 and discard those for which we know there's nothing we can
1326 do. */
1327 if (has_debug_stmt)
9771b263 1328 FOR_EACH_VEC_ELT (body, i, bb)
b5b8b0ac
AO
1329 if (bb != entry_bb && bb != exit_bb)
1330 {
1331 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1332 {
1333 gimple stmt = gsi_stmt (gsi);
1334
ddb555ed 1335 if (is_gimple_debug (stmt))
b5b8b0ac 1336 {
c203e8a7
TS
1337 if (separate_decls_in_region_debug (stmt, &name_copies,
1338 &decl_copies))
b5b8b0ac
AO
1339 {
1340 gsi_remove (&gsi, true);
1341 continue;
1342 }
1343 }
1344
1345 gsi_next (&gsi);
1346 }
1347 }
1348
c203e8a7 1349 if (name_copies.elements () == 0 && reduction_list->elements () == 0)
5f40b3cb
ZD
1350 {
1351 /* It may happen that there is nothing to copy (if there are only
a509ebb5 1352 loop carried and external variables in the loop). */
5f40b3cb
ZD
1353 *arg_struct = NULL;
1354 *new_arg_struct = NULL;
1355 }
1356 else
1357 {
1358 /* Create the type for the structure to store the ssa names to. */
1359 type = lang_hooks.types.make_type (RECORD_TYPE);
9ff70652 1360 type_name = build_decl (UNKNOWN_LOCATION,
c2255bc4 1361 TYPE_DECL, create_tmp_var_name (".paral_data"),
5f40b3cb
ZD
1362 type);
1363 TYPE_NAME (type) = type_name;
1364
4a8fb1a1 1365 name_copies.traverse <tree, add_field_for_name> (type);
c203e8a7 1366 if (reduction_list && reduction_list->elements () > 0)
0eb7e7aa
RL
1367 {
1368 /* Create the fields for reductions. */
c203e8a7 1369 reduction_list->traverse <tree, add_field_for_reduction> (type);
0eb7e7aa 1370 }
5f40b3cb 1371 layout_type (type);
b8698a0f 1372
5f40b3cb
ZD
1373 /* Create the loads and stores. */
1374 *arg_struct = create_tmp_var (type, ".paral_data_store");
5f40b3cb 1375 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
b731b390 1376 *new_arg_struct = make_ssa_name (nvar);
5f40b3cb 1377
a509ebb5
RL
1378 ld_st_data->store = *arg_struct;
1379 ld_st_data->load = *new_arg_struct;
1380 ld_st_data->store_bb = bb0;
1381 ld_st_data->load_bb = bb1;
0eb7e7aa 1382
4a8fb1a1
LC
1383 name_copies
1384 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1385 (ld_st_data);
a509ebb5 1386
ae0bce62
RL
1387 /* Load the calculation from memory (after the join of the threads). */
1388
c203e8a7 1389 if (reduction_list && reduction_list->elements () > 0)
a509ebb5 1390 {
4a8fb1a1 1391 reduction_list
c203e8a7
TS
1392 ->traverse <struct clsn_data *, create_stores_for_reduction>
1393 (ld_st_data);
b731b390 1394 clsn_data.load = make_ssa_name (nvar);
9f9f72aa 1395 clsn_data.load_bb = exit->dest;
a509ebb5
RL
1396 clsn_data.store = ld_st_data->store;
1397 create_final_loads_for_reduction (reduction_list, &clsn_data);
1398 }
5f40b3cb 1399 }
5f40b3cb
ZD
1400}
1401
a79b7ec5 1402/* Returns true if FN was created to run in parallel. */
5f40b3cb 1403
62e0a1ed 1404bool
a79b7ec5 1405parallelized_function_p (tree fndecl)
5f40b3cb 1406{
a79b7ec5
TV
1407 cgraph_node *node = cgraph_node::get (fndecl);
1408 gcc_assert (node != NULL);
1409 return node->parallelized_function;
5f40b3cb
ZD
1410}
1411
1412/* Creates and returns an empty function that will receive the body of
1413 a parallelized loop. */
1414
1415static tree
9ff70652 1416create_loop_fn (location_t loc)
5f40b3cb
ZD
1417{
1418 char buf[100];
1419 char *tname;
1420 tree decl, type, name, t;
1421 struct function *act_cfun = cfun;
1422 static unsigned loopfn_num;
1423
5368224f 1424 loc = LOCATION_LOCUS (loc);
5f40b3cb
ZD
1425 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1426 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1427 clean_symbol_name (tname);
1428 name = get_identifier (tname);
1429 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1430
9ff70652 1431 decl = build_decl (loc, FUNCTION_DECL, name, type);
5f40b3cb
ZD
1432 TREE_STATIC (decl) = 1;
1433 TREE_USED (decl) = 1;
1434 DECL_ARTIFICIAL (decl) = 1;
1435 DECL_IGNORED_P (decl) = 0;
1436 TREE_PUBLIC (decl) = 0;
1437 DECL_UNINLINABLE (decl) = 1;
1438 DECL_EXTERNAL (decl) = 0;
1439 DECL_CONTEXT (decl) = NULL_TREE;
1440 DECL_INITIAL (decl) = make_node (BLOCK);
1441
9ff70652 1442 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
5f40b3cb
ZD
1443 DECL_ARTIFICIAL (t) = 1;
1444 DECL_IGNORED_P (t) = 1;
1445 DECL_RESULT (decl) = t;
1446
9ff70652 1447 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
5f40b3cb
ZD
1448 ptr_type_node);
1449 DECL_ARTIFICIAL (t) = 1;
1450 DECL_ARG_TYPE (t) = ptr_type_node;
1451 DECL_CONTEXT (t) = decl;
1452 TREE_USED (t) = 1;
1453 DECL_ARGUMENTS (decl) = t;
1454
182e0d71 1455 allocate_struct_function (decl, false);
5f40b3cb
ZD
1456
1457 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1458 it. */
5576d6f2 1459 set_cfun (act_cfun);
5f40b3cb
ZD
1460
1461 return decl;
1462}
1463
7c82d827
TV
1464/* Replace uses of NAME by VAL in block BB. */
1465
1466static void
1467replace_uses_in_bb_by (tree name, tree val, basic_block bb)
1468{
1469 gimple use_stmt;
1470 imm_use_iterator imm_iter;
1471
1472 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
1473 {
1474 if (gimple_bb (use_stmt) != bb)
1475 continue;
1476
1477 use_operand_p use_p;
1478 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1479 SET_USE (use_p, val);
1480 }
1481}
1482
7c82d827
TV
1483/* Do transformation from:
1484
1485 <bb preheader>:
1486 ...
1487 goto <bb header>
1488
1489 <bb header>:
1490 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1491 sum_a = PHI <sum_init (preheader), sum_b (latch)>
1492 ...
1493 use (ivtmp_a)
1494 ...
1495 sum_b = sum_a + sum_update
1496 ...
1497 if (ivtmp_a < n)
1498 goto <bb latch>;
1499 else
1500 goto <bb exit>;
1501
1502 <bb latch>:
1503 ivtmp_b = ivtmp_a + 1;
1504 goto <bb header>
1505
1506 <bb exit>:
712cb0bb 1507 sum_z = PHI <sum_b (cond[1]), ...>
7c82d827
TV
1508
1509 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
1510 that's <bb header>.
1511
1512 to:
1513
1514 <bb preheader>:
1515 ...
1516 goto <bb newheader>
1517
1518 <bb header>:
1519 ivtmp_a = PHI <ivtmp_c (latch)>
1520 sum_a = PHI <sum_c (latch)>
1521 ...
1522 use (ivtmp_a)
1523 ...
1524 sum_b = sum_a + sum_update
1525 ...
1526 goto <bb latch>;
1527
1528 <bb newheader>:
1529 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1530 sum_c = PHI <sum_init (preheader), sum_b (latch)>
1531 if (ivtmp_c < n + 1)
1532 goto <bb header>;
1533 else
712cb0bb 1534 goto <bb newexit>;
7c82d827
TV
1535
1536 <bb latch>:
1537 ivtmp_b = ivtmp_a + 1;
1538 goto <bb newheader>
1539
712cb0bb
TV
1540 <bb newexit>:
1541 sum_y = PHI <sum_c (newheader)>
1542
7c82d827 1543 <bb exit>:
712cb0bb 1544 sum_z = PHI <sum_y (newexit), ...>
7c82d827
TV
1545
1546
1547 In unified diff format:
1548
1549 <bb preheader>:
1550 ...
1551- goto <bb header>
1552+ goto <bb newheader>
1553
1554 <bb header>:
1555- ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1556- sum_a = PHI <sum_init (preheader), sum_b (latch)>
1557+ ivtmp_a = PHI <ivtmp_c (latch)>
1558+ sum_a = PHI <sum_c (latch)>
1559 ...
1560 use (ivtmp_a)
1561 ...
1562 sum_b = sum_a + sum_update
1563 ...
1564- if (ivtmp_a < n)
1565- goto <bb latch>;
1566+ goto <bb latch>;
1567+
1568+ <bb newheader>:
1569+ ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1570+ sum_c = PHI <sum_init (preheader), sum_b (latch)>
1571+ if (ivtmp_c < n + 1)
1572+ goto <bb header>;
1573 else
1574 goto <bb exit>;
1575
1576 <bb latch>:
1577 ivtmp_b = ivtmp_a + 1;
1578- goto <bb header>
1579+ goto <bb newheader>
1580
712cb0bb
TV
1581+ <bb newexit>:
1582+ sum_y = PHI <sum_c (newheader)>
1583
7c82d827 1584 <bb exit>:
712cb0bb
TV
1585- sum_z = PHI <sum_b (cond[1]), ...>
1586+ sum_z = PHI <sum_y (newexit), ...>
7c82d827
TV
1587
1588 Note: the example does not show any virtual phis, but these are handled more
1589 or less as reductions.
b8698a0f 1590
7c82d827
TV
1591
1592 Moves the exit condition of LOOP to the beginning of its header.
1593 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
1594 bound. */
1595
1596static void
1597transform_to_exit_first_loop_alt (struct loop *loop,
1598 reduction_info_table_type *reduction_list,
1599 tree bound)
1600{
1601 basic_block header = loop->header;
1602 basic_block latch = loop->latch;
1603 edge exit = single_dom_exit (loop);
1604 basic_block exit_block = exit->dest;
1605 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1606 tree control = gimple_cond_lhs (cond_stmt);
1607 edge e;
1608
338392ed
TV
1609 /* Rewriting virtuals into loop-closed ssa normal form makes this
1610 transformation simpler. It also ensures that the virtuals are in
1611 loop-closed ssa normal from after the transformation, which is required by
1612 create_parallel_loop. */
1613 rewrite_virtuals_into_loop_closed_ssa (loop);
7c82d827
TV
1614
1615 /* Create the new_header block. */
1616 basic_block new_header = split_block_before_cond_jump (exit->src);
712cb0bb 1617 edge edge_at_split = single_pred_edge (new_header);
7c82d827
TV
1618
1619 /* Redirect entry edge to new_header. */
1620 edge entry = loop_preheader_edge (loop);
1621 e = redirect_edge_and_branch (entry, new_header);
1622 gcc_assert (e == entry);
1623
1624 /* Redirect post_inc_edge to new_header. */
1625 edge post_inc_edge = single_succ_edge (latch);
1626 e = redirect_edge_and_branch (post_inc_edge, new_header);
1627 gcc_assert (e == post_inc_edge);
1628
1629 /* Redirect post_cond_edge to header. */
1630 edge post_cond_edge = single_pred_edge (latch);
1631 e = redirect_edge_and_branch (post_cond_edge, header);
1632 gcc_assert (e == post_cond_edge);
1633
712cb0bb
TV
1634 /* Redirect edge_at_split to latch. */
1635 e = redirect_edge_and_branch (edge_at_split, latch);
1636 gcc_assert (e == edge_at_split);
7c82d827
TV
1637
1638 /* Set the new loop bound. */
1639 gimple_cond_set_rhs (cond_stmt, bound);
5a5fd951 1640 update_stmt (cond_stmt);
7c82d827
TV
1641
1642 /* Repair the ssa. */
1643 vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
1644 edge_var_map *vm;
1645 gphi_iterator gsi;
338392ed 1646 int i;
7c82d827
TV
1647 for (gsi = gsi_start_phis (header), i = 0;
1648 !gsi_end_p (gsi) && v->iterate (i, &vm);
1649 gsi_next (&gsi), i++)
1650 {
1651 gphi *phi = gsi.phi ();
1652 tree res_a = PHI_RESULT (phi);
1653
1654 /* Create new phi. */
1655 tree res_c = copy_ssa_name (res_a, phi);
1656 gphi *nphi = create_phi_node (res_c, new_header);
1657
1658 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
1659 replace_uses_in_bb_by (res_a, res_c, new_header);
1660
1661 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
1662 add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
1663
338392ed 1664 /* Replace sum_b with sum_c in exit phi. */
7c82d827 1665 tree res_b = redirect_edge_var_map_def (vm);
338392ed 1666 replace_uses_in_bb_by (res_b, res_c, exit_block);
7c82d827
TV
1667
1668 struct reduction_info *red = reduction_phi (reduction_list, phi);
1669 gcc_assert (virtual_operand_p (res_a)
1670 || res_a == control
1671 || red != NULL);
1672
1673 if (red)
1674 {
1675 /* Register the new reduction phi. */
1676 red->reduc_phi = nphi;
1677 gimple_set_uid (red->reduc_phi, red->reduc_version);
1678 }
1679 }
1680 gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
7c82d827
TV
1681
1682 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
1683 flush_pending_stmts (entry);
1684
1685 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
1686 flush_pending_stmts (post_inc_edge);
1687
712cb0bb
TV
1688 /* Create a new empty exit block, inbetween the new loop header and the old
1689 exit block. The function separate_decls_in_region needs this block to
1690 insert code that is active on loop exit, but not any other path. */
1691 basic_block new_exit_block = split_edge (exit);
1692
1693 /* Insert and register the reduction exit phis. */
7c82d827
TV
1694 for (gphi_iterator gsi = gsi_start_phis (exit_block);
1695 !gsi_end_p (gsi);
1696 gsi_next (&gsi))
1697 {
1698 gphi *phi = gsi.phi ();
1699 tree res_z = PHI_RESULT (phi);
712cb0bb
TV
1700
1701 /* Now that we have a new exit block, duplicate the phi of the old exit
1702 block in the new exit block to preserve loop-closed ssa. */
1703 edge succ_new_exit_block = single_succ_edge (new_exit_block);
1704 edge pred_new_exit_block = single_pred_edge (new_exit_block);
1705 tree res_y = copy_ssa_name (res_z, phi);
1706 gphi *nphi = create_phi_node (res_y, new_exit_block);
1707 tree res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
1708 add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
1709 add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
1710
7c82d827
TV
1711 if (virtual_operand_p (res_z))
1712 continue;
1713
7c82d827
TV
1714 gimple reduc_phi = SSA_NAME_DEF_STMT (res_c);
1715 struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
1716 if (red != NULL)
712cb0bb 1717 red->keep_res = nphi;
7c82d827
TV
1718 }
1719
1720 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
1721 then we're still using some fields, so only bother about fields that are
1722 still used: header and latch.
1723 The loop has a new header bb, so we update it. The latch bb stays the
1724 same. */
1725 loop->header = new_header;
1726
1727 /* Recalculate dominance info. */
1728 free_dominance_info (CDI_DOMINATORS);
1729 calculate_dominance_info (CDI_DOMINATORS);
1730}
1731
1732/* Tries to moves the exit condition of LOOP to the beginning of its header
1733 without duplication of the loop body. NIT is the number of iterations of the
1734 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
1735 transformation is successful. */
1736
1737static bool
1738try_transform_to_exit_first_loop_alt (struct loop *loop,
1739 reduction_info_table_type *reduction_list,
1740 tree nit)
1741{
1742 /* Check whether the latch contains a single statement. */
1b7f61eb
TV
1743 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
1744 return false;
7c82d827
TV
1745
1746 /* Check whether the latch contains the loop iv increment. */
1747 edge back = single_succ_edge (loop->latch);
1748 edge exit = single_dom_exit (loop);
1749 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1750 tree control = gimple_cond_lhs (cond_stmt);
1751 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
1752 tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
1753 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
1754 return false;
1755
1756 /* Check whether there's no code between the loop condition and the latch. */
1757 if (!single_pred_p (loop->latch)
1758 || single_pred (loop->latch) != exit->src)
1759 return false;
1760
1761 tree alt_bound = NULL_TREE;
1762 tree nit_type = TREE_TYPE (nit);
1763
1764 /* Figure out whether nit + 1 overflows. */
1765 if (TREE_CODE (nit) == INTEGER_CST)
1766 {
1767 if (!tree_int_cst_equal (nit, TYPE_MAXVAL (nit_type)))
1768 {
1769 alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
1770 nit, build_one_cst (nit_type));
1771
1772 gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
fd7b3ef5
TV
1773 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1774 return true;
7c82d827
TV
1775 }
1776 else
1777 {
1778 /* Todo: Figure out if we can trigger this, if it's worth to handle
1779 optimally, and if we can handle it optimally. */
fd7b3ef5 1780 return false;
7c82d827
TV
1781 }
1782 }
7c82d827 1783
fd7b3ef5 1784 gcc_assert (TREE_CODE (nit) == SSA_NAME);
7c82d827 1785
4f75d608
TV
1786 /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
1787 iv with base 0 and step 1 that is incremented in the latch, like this:
1788
1789 <bb header>:
1790 # iv_1 = PHI <0 (preheader), iv_2 (latch)>
1791 ...
1792 if (iv_1 < nit)
1793 goto <bb latch>;
1794 else
1795 goto <bb exit>;
1796
1797 <bb latch>:
1798 iv_2 = iv_1 + 1;
1799 goto <bb header>;
1800
1801 The range of iv_1 is [0, nit]. The latch edge is taken for
1802 iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit. So the
1803 number of latch executions is equal to nit.
1804
1805 The function max_loop_iterations gives us the maximum number of latch
1806 executions, so it gives us the maximum value of nit. */
1807 widest_int nit_max;
1808 if (!max_loop_iterations (loop, &nit_max))
1809 return false;
1810
1811 /* Check if nit + 1 overflows. */
1812 widest_int type_max = wi::to_widest (TYPE_MAXVAL (nit_type));
1813 if (!wi::lts_p (nit_max, type_max))
1814 return false;
1815
fd7b3ef5 1816 gimple def = SSA_NAME_DEF_STMT (nit);
7c82d827 1817
4f75d608 1818 /* Try to find nit + 1, in the form of n in an assignment nit = n - 1. */
fd7b3ef5
TV
1819 if (def
1820 && is_gimple_assign (def)
1821 && gimple_assign_rhs_code (def) == PLUS_EXPR)
1822 {
1823 tree op1 = gimple_assign_rhs1 (def);
1824 tree op2 = gimple_assign_rhs2 (def);
1825 if (integer_minus_onep (op1))
1826 alt_bound = op2;
1827 else if (integer_minus_onep (op2))
1828 alt_bound = op1;
7c82d827
TV
1829 }
1830
1831 if (alt_bound == NULL_TREE)
1832 return false;
1833
1834 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1835 return true;
1836}
1837
1838/* Moves the exit condition of LOOP to the beginning of its header. NIT is the
1839 number of iterations of the loop. REDUCTION_LIST describes the reductions in
1840 LOOP. */
5f40b3cb
ZD
1841
1842static void
4a8fb1a1 1843transform_to_exit_first_loop (struct loop *loop,
c203e8a7 1844 reduction_info_table_type *reduction_list,
4a8fb1a1 1845 tree nit)
5f40b3cb
ZD
1846{
1847 basic_block *bbs, *nbbs, ex_bb, orig_header;
1848 unsigned n;
1849 bool ok;
1850 edge exit = single_dom_exit (loop), hpred;
726a989a 1851 tree control, control_name, res, t;
538dd0b7
DM
1852 gphi *phi, *nphi;
1853 gassign *stmt;
1854 gcond *cond_stmt, *cond_nit;
48710229 1855 tree nit_1;
5f40b3cb
ZD
1856
1857 split_block_after_labels (loop->header);
1858 orig_header = single_succ (loop->header);
1859 hpred = single_succ_edge (loop->header);
1860
538dd0b7 1861 cond_stmt = as_a <gcond *> (last_stmt (exit->src));
726a989a
RB
1862 control = gimple_cond_lhs (cond_stmt);
1863 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
5f40b3cb
ZD
1864
1865 /* Make sure that we have phi nodes on exit for all loop header phis
1866 (create_parallel_loop requires that). */
538dd0b7
DM
1867 for (gphi_iterator gsi = gsi_start_phis (loop->header);
1868 !gsi_end_p (gsi);
1869 gsi_next (&gsi))
5f40b3cb 1870 {
538dd0b7 1871 phi = gsi.phi ();
5f40b3cb 1872 res = PHI_RESULT (phi);
070ecdfd 1873 t = copy_ssa_name (res, phi);
5f40b3cb 1874 SET_PHI_RESULT (phi, t);
5f40b3cb 1875 nphi = create_phi_node (res, orig_header);
9e227d60 1876 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
5f40b3cb
ZD
1877
1878 if (res == control)
1879 {
726a989a 1880 gimple_cond_set_lhs (cond_stmt, t);
5f40b3cb
ZD
1881 update_stmt (cond_stmt);
1882 control = t;
1883 }
1884 }
12037899 1885
5f40b3cb 1886 bbs = get_loop_body_in_dom_order (loop);
48710229 1887
69958396
RL
1888 for (n = 0; bbs[n] != exit->src; n++)
1889 continue;
5f40b3cb 1890 nbbs = XNEWVEC (basic_block, n);
726a989a
RB
1891 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1892 bbs + 1, n, nbbs);
5f40b3cb
ZD
1893 gcc_assert (ok);
1894 free (bbs);
1895 ex_bb = nbbs[0];
1896 free (nbbs);
1897
b8698a0f 1898 /* Other than reductions, the only gimple reg that should be copied
726a989a 1899 out of the loop is the control variable. */
69958396 1900 exit = single_dom_exit (loop);
5f40b3cb 1901 control_name = NULL_TREE;
538dd0b7
DM
1902 for (gphi_iterator gsi = gsi_start_phis (ex_bb);
1903 !gsi_end_p (gsi); )
5f40b3cb 1904 {
538dd0b7 1905 phi = gsi.phi ();
5f40b3cb 1906 res = PHI_RESULT (phi);
ea057359 1907 if (virtual_operand_p (res))
726a989a
RB
1908 {
1909 gsi_next (&gsi);
1910 continue;
1911 }
5f40b3cb 1912
a509ebb5 1913 /* Check if it is a part of reduction. If it is,
b8698a0f
L
1914 keep the phi at the reduction's keep_res field. The
1915 PHI_RESULT of this phi is the resulting value of the reduction
a509ebb5
RL
1916 variable when exiting the loop. */
1917
c203e8a7 1918 if (reduction_list->elements () > 0)
a509ebb5
RL
1919 {
1920 struct reduction_info *red;
1921
1922 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
a509ebb5
RL
1923 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1924 if (red)
726a989a
RB
1925 {
1926 red->keep_res = phi;
1927 gsi_next (&gsi);
1928 continue;
1929 }
a509ebb5 1930 }
726a989a
RB
1931 gcc_assert (control_name == NULL_TREE
1932 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
5f40b3cb 1933 control_name = res;
726a989a 1934 remove_phi_node (&gsi, false);
5f40b3cb
ZD
1935 }
1936 gcc_assert (control_name != NULL_TREE);
5f40b3cb 1937
b8698a0f 1938 /* Initialize the control variable to number of iterations
48710229 1939 according to the rhs of the exit condition. */
538dd0b7
DM
1940 gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
1941 cond_nit = as_a <gcond *> (last_stmt (exit->src));
48710229
RL
1942 nit_1 = gimple_cond_rhs (cond_nit);
1943 nit_1 = force_gimple_operand_gsi (&gsi,
1944 fold_convert (TREE_TYPE (control_name), nit_1),
726a989a 1945 false, NULL_TREE, false, GSI_SAME_STMT);
48710229 1946 stmt = gimple_build_assign (control_name, nit_1);
726a989a 1947 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1948}
1949
1950/* Create the parallel constructs for LOOP as described in gen_parallel_loop.
726a989a 1951 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
5f40b3cb
ZD
1952 NEW_DATA is the variable that should be initialized from the argument
1953 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
726a989a 1954 basic block containing GIMPLE_OMP_PARALLEL tree. */
5f40b3cb
ZD
1955
1956static basic_block
1957create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
9ff70652 1958 tree new_data, unsigned n_threads, location_t loc)
5f40b3cb 1959{
726a989a 1960 gimple_stmt_iterator gsi;
5f40b3cb 1961 basic_block bb, paral_bb, for_bb, ex_bb;
0f900dfa 1962 tree t, param;
538dd0b7
DM
1963 gomp_parallel *omp_par_stmt;
1964 gimple omp_return_stmt1, omp_return_stmt2;
1965 gimple phi;
1966 gcond *cond_stmt;
1967 gomp_for *for_stmt;
1968 gomp_continue *omp_cont_stmt;
726a989a 1969 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
5f40b3cb
ZD
1970 edge exit, nexit, guard, end, e;
1971
726a989a 1972 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
5f40b3cb
ZD
1973 bb = loop_preheader_edge (loop)->src;
1974 paral_bb = single_pred (bb);
726a989a 1975 gsi = gsi_last_bb (paral_bb);
5f40b3cb 1976
9ff70652 1977 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
5f40b3cb 1978 OMP_CLAUSE_NUM_THREADS_EXPR (t)
a509ebb5 1979 = build_int_cst (integer_type_node, n_threads);
538dd0b7
DM
1980 omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1981 gimple_set_location (omp_par_stmt, loc);
5f40b3cb 1982
538dd0b7 1983 gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1984
1985 /* Initialize NEW_DATA. */
1986 if (data)
1987 {
538dd0b7
DM
1988 gassign *assign_stmt;
1989
726a989a
RB
1990 gsi = gsi_after_labels (bb);
1991
b731b390 1992 param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
538dd0b7
DM
1993 assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1994 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
726a989a 1995
538dd0b7 1996 assign_stmt = gimple_build_assign (new_data,
726a989a 1997 fold_convert (TREE_TYPE (new_data), param));
538dd0b7 1998 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5f40b3cb
ZD
1999 }
2000
726a989a 2001 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
5f40b3cb 2002 bb = split_loop_exit_edge (single_dom_exit (loop));
726a989a 2003 gsi = gsi_last_bb (bb);
538dd0b7
DM
2004 omp_return_stmt1 = gimple_build_omp_return (false);
2005 gimple_set_location (omp_return_stmt1, loc);
2006 gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
5f40b3cb 2007
726a989a 2008 /* Extract data for GIMPLE_OMP_FOR. */
5f40b3cb 2009 gcc_assert (loop->header == single_dom_exit (loop)->src);
538dd0b7 2010 cond_stmt = as_a <gcond *> (last_stmt (loop->header));
5f40b3cb 2011
726a989a 2012 cvar = gimple_cond_lhs (cond_stmt);
5f40b3cb
ZD
2013 cvar_base = SSA_NAME_VAR (cvar);
2014 phi = SSA_NAME_DEF_STMT (cvar);
2015 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
b731b390 2016 initvar = copy_ssa_name (cvar);
5f40b3cb
ZD
2017 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2018 initvar);
2019 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2020
1dff453d 2021 gsi = gsi_last_nondebug_bb (loop->latch);
726a989a
RB
2022 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2023 gsi_remove (&gsi, true);
5f40b3cb
ZD
2024
2025 /* Prepare cfg. */
2026 for_bb = split_edge (loop_preheader_edge (loop));
2027 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2028 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2029 gcc_assert (exit == single_dom_exit (loop));
2030
2031 guard = make_edge (for_bb, ex_bb, 0);
2032 single_succ_edge (loop->latch)->flags = 0;
2033 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
538dd0b7
DM
2034 for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2035 !gsi_end_p (gpi); gsi_next (&gpi))
5f40b3cb 2036 {
f5045c96
AM
2037 source_location locus;
2038 tree def;
538dd0b7
DM
2039 gphi *phi = gpi.phi ();
2040 gphi *stmt;
2041
2042 stmt = as_a <gphi *> (
2043 SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit)));
f5045c96
AM
2044
2045 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
b8698a0f 2046 locus = gimple_phi_arg_location_from_edge (stmt,
f5045c96 2047 loop_preheader_edge (loop));
9e227d60 2048 add_phi_arg (phi, def, guard, locus);
f5045c96
AM
2049
2050 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2051 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
9e227d60 2052 add_phi_arg (phi, def, end, locus);
5f40b3cb
ZD
2053 }
2054 e = redirect_edge_and_branch (exit, nexit->dest);
2055 PENDING_STMT (e) = NULL;
2056
726a989a
RB
2057 /* Emit GIMPLE_OMP_FOR. */
2058 gimple_cond_set_lhs (cond_stmt, cvar_base);
5f40b3cb 2059 type = TREE_TYPE (cvar);
9ff70652 2060 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
5f40b3cb
ZD
2061 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2062
74bf76ed 2063 for_stmt = gimple_build_omp_for (NULL, GF_OMP_FOR_KIND_FOR, t, 1, NULL);
9ff70652 2064 gimple_set_location (for_stmt, loc);
726a989a
RB
2065 gimple_omp_for_set_index (for_stmt, 0, initvar);
2066 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2067 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2068 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2069 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2070 cvar_base,
2071 build_int_cst (type, 1)));
2072
2073 gsi = gsi_last_bb (for_bb);
2074 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
5f40b3cb
ZD
2075 SSA_NAME_DEF_STMT (initvar) = for_stmt;
2076
726a989a
RB
2077 /* Emit GIMPLE_OMP_CONTINUE. */
2078 gsi = gsi_last_bb (loop->latch);
538dd0b7
DM
2079 omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2080 gimple_set_location (omp_cont_stmt, loc);
2081 gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2082 SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
5f40b3cb 2083
726a989a
RB
2084 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2085 gsi = gsi_last_bb (ex_bb);
538dd0b7
DM
2086 omp_return_stmt2 = gimple_build_omp_return (true);
2087 gimple_set_location (omp_return_stmt2, loc);
2088 gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
5f40b3cb 2089
cd7d9fd7
RG
2090 /* After the above dom info is hosed. Re-compute it. */
2091 free_dominance_info (CDI_DOMINATORS);
2092 calculate_dominance_info (CDI_DOMINATORS);
2093
5f40b3cb
ZD
2094 return paral_bb;
2095}
2096
08dab97a
RL
2097/* Generates code to execute the iterations of LOOP in N_THREADS
2098 threads in parallel.
2099
2100 NITER describes number of iterations of LOOP.
fa10beec 2101 REDUCTION_LIST describes the reductions existent in the LOOP. */
5f40b3cb
ZD
2102
2103static void
c203e8a7
TS
2104gen_parallel_loop (struct loop *loop,
2105 reduction_info_table_type *reduction_list,
a509ebb5 2106 unsigned n_threads, struct tree_niter_desc *niter)
5f40b3cb 2107{
5f40b3cb 2108 tree many_iterations_cond, type, nit;
726a989a
RB
2109 tree arg_struct, new_arg_struct;
2110 gimple_seq stmts;
9f9f72aa 2111 edge entry, exit;
a509ebb5 2112 struct clsn_data clsn_data;
5f40b3cb 2113 unsigned prob;
9ff70652
JJ
2114 location_t loc;
2115 gimple cond_stmt;
768da0da 2116 unsigned int m_p_thread=2;
5f40b3cb
ZD
2117
2118 /* From
2119
2120 ---------------------------------------------------------------------
2121 loop
2122 {
2123 IV = phi (INIT, IV + STEP)
2124 BODY1;
2125 if (COND)
2126 break;
2127 BODY2;
2128 }
2129 ---------------------------------------------------------------------
2130
2131 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2132 we generate the following code:
2133
2134 ---------------------------------------------------------------------
2135
2136 if (MAY_BE_ZERO
a509ebb5
RL
2137 || NITER < MIN_PER_THREAD * N_THREADS)
2138 goto original;
5f40b3cb
ZD
2139
2140 BODY1;
2141 store all local loop-invariant variables used in body of the loop to DATA.
726a989a 2142 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
5f40b3cb 2143 load the variables from DATA.
726a989a 2144 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
5f40b3cb
ZD
2145 BODY2;
2146 BODY1;
726a989a
RB
2147 GIMPLE_OMP_CONTINUE;
2148 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
2149 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
5f40b3cb
ZD
2150 goto end;
2151
2152 original:
2153 loop
2154 {
2155 IV = phi (INIT, IV + STEP)
2156 BODY1;
2157 if (COND)
2158 break;
2159 BODY2;
2160 }
2161
2162 end:
2163
2164 */
2165
2166 /* Create two versions of the loop -- in the old one, we know that the
2167 number of iterations is large enough, and we will transform it into the
2168 loop that will be split to loop_fn, the new one will be used for the
2169 remaining iterations. */
a509ebb5 2170
768da0da
RL
2171 /* We should compute a better number-of-iterations value for outer loops.
2172 That is, if we have
2173
2174 for (i = 0; i < n; ++i)
2175 for (j = 0; j < m; ++j)
2176 ...
2177
2178 we should compute nit = n * m, not nit = n.
2179 Also may_be_zero handling would need to be adjusted. */
2180
5f40b3cb
ZD
2181 type = TREE_TYPE (niter->niter);
2182 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
2183 NULL_TREE);
2184 if (stmts)
726a989a 2185 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb 2186
768da0da
RL
2187 if (loop->inner)
2188 m_p_thread=2;
2189 else
2190 m_p_thread=MIN_PER_THREAD;
2191
2192 many_iterations_cond =
2193 fold_build2 (GE_EXPR, boolean_type_node,
2194 nit, build_int_cst (type, m_p_thread * n_threads));
2195
5f40b3cb 2196 many_iterations_cond
a509ebb5
RL
2197 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2198 invert_truthvalue (unshare_expr (niter->may_be_zero)),
2199 many_iterations_cond);
5f40b3cb 2200 many_iterations_cond
a509ebb5 2201 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
5f40b3cb 2202 if (stmts)
726a989a 2203 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb
ZD
2204 if (!is_gimple_condexpr (many_iterations_cond))
2205 {
2206 many_iterations_cond
a509ebb5
RL
2207 = force_gimple_operand (many_iterations_cond, &stmts,
2208 true, NULL_TREE);
5f40b3cb 2209 if (stmts)
726a989a 2210 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb
ZD
2211 }
2212
2213 initialize_original_copy_tables ();
2214
2215 /* We assume that the loop usually iterates a lot. */
2216 prob = 4 * REG_BR_PROB_BASE / 5;
0f900dfa
JJ
2217 loop_version (loop, many_iterations_cond, NULL,
2218 prob, prob, REG_BR_PROB_BASE - prob, true);
5f40b3cb
ZD
2219 update_ssa (TODO_update_ssa);
2220 free_original_copy_tables ();
2221
2222 /* Base all the induction variables in LOOP on a single control one. */
c80a5403 2223 canonicalize_loop_ivs (loop, &nit, true);
5f40b3cb 2224
7c82d827
TV
2225 /* Ensure that the exit condition is the first statement in the loop.
2226 The common case is that latch of the loop is empty (apart from the
2227 increment) and immediately follows the loop exit test. Attempt to move the
2228 entry of the loop directly before the exit check and increase the number of
2229 iterations of the loop by one. */
2230 if (!try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
2231 {
2232 /* Fall back on the method that handles more cases, but duplicates the
2233 loop body: move the exit condition of LOOP to the beginning of its
2234 header, and duplicate the part of the last iteration that gets disabled
2235 to the exit of the loop. */
2236 transform_to_exit_first_loop (loop, reduction_list, nit);
2237 }
a509ebb5 2238
fa10beec 2239 /* Generate initializations for reductions. */
c203e8a7
TS
2240 if (reduction_list->elements () > 0)
2241 reduction_list->traverse <struct loop *, initialize_reductions> (loop);
5f40b3cb
ZD
2242
2243 /* Eliminate the references to local variables from the loop. */
9f9f72aa
AP
2244 gcc_assert (single_exit (loop));
2245 entry = loop_preheader_edge (loop);
2246 exit = single_dom_exit (loop);
5f40b3cb 2247
9f9f72aa 2248 eliminate_local_variables (entry, exit);
5f40b3cb
ZD
2249 /* In the old loop, move all variables non-local to the loop to a structure
2250 and back, and create separate decls for the variables used in loop. */
b8698a0f 2251 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
9f9f72aa 2252 &new_arg_struct, &clsn_data);
5f40b3cb
ZD
2253
2254 /* Create the parallel constructs. */
9ff70652
JJ
2255 loc = UNKNOWN_LOCATION;
2256 cond_stmt = last_stmt (loop->header);
2257 if (cond_stmt)
2258 loc = gimple_location (cond_stmt);
18751894
TV
2259 create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
2260 new_arg_struct, n_threads, loc);
c203e8a7 2261 if (reduction_list->elements () > 0)
a509ebb5 2262 create_call_for_reduction (loop, reduction_list, &clsn_data);
5f40b3cb
ZD
2263
2264 scev_reset ();
2265
2266 /* Cancel the loop (it is simpler to do it here rather than to teach the
2267 expander to do it). */
2268 cancel_loop_tree (loop);
2269
92a6bdbd
SP
2270 /* Free loop bound estimations that could contain references to
2271 removed statements. */
f0bd40b1 2272 FOR_EACH_LOOP (loop, 0)
92a6bdbd 2273 free_numbers_of_iterations_estimates_loop (loop);
5f40b3cb
ZD
2274}
2275
9857228c
SP
2276/* Returns true when LOOP contains vector phi nodes. */
2277
2278static bool
726a989a 2279loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
9857228c
SP
2280{
2281 unsigned i;
2282 basic_block *bbs = get_loop_body_in_dom_order (loop);
538dd0b7 2283 gphi_iterator gsi;
9857228c 2284 bool res = true;
9857228c
SP
2285
2286 for (i = 0; i < loop->num_nodes; i++)
726a989a 2287 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
538dd0b7 2288 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
9857228c
SP
2289 goto end;
2290
2291 res = false;
2292 end:
2293 free (bbs);
2294 return res;
2295}
2296
08dab97a
RL
2297/* Create a reduction_info struct, initialize it with REDUC_STMT
2298 and PHI, insert it to the REDUCTION_LIST. */
2299
2300static void
c203e8a7 2301build_new_reduction (reduction_info_table_type *reduction_list,
538dd0b7 2302 gimple reduc_stmt, gphi *phi)
08dab97a 2303{
4a8fb1a1 2304 reduction_info **slot;
08dab97a
RL
2305 struct reduction_info *new_reduction;
2306
2307 gcc_assert (reduc_stmt);
b8698a0f 2308
08dab97a
RL
2309 if (dump_file && (dump_flags & TDF_DETAILS))
2310 {
2311 fprintf (dump_file,
2312 "Detected reduction. reduction stmt is: \n");
2313 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
2314 fprintf (dump_file, "\n");
2315 }
b8698a0f 2316
08dab97a 2317 new_reduction = XCNEW (struct reduction_info);
b8698a0f 2318
08dab97a
RL
2319 new_reduction->reduc_stmt = reduc_stmt;
2320 new_reduction->reduc_phi = phi;
5d1fd1de 2321 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
08dab97a 2322 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
c203e8a7 2323 slot = reduction_list->find_slot (new_reduction, INSERT);
08dab97a
RL
2324 *slot = new_reduction;
2325}
2326
5d1fd1de
JJ
2327/* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
2328
4a8fb1a1
LC
2329int
2330set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
5d1fd1de 2331{
4a8fb1a1 2332 struct reduction_info *const red = *slot;
5d1fd1de
JJ
2333 gimple_set_uid (red->reduc_phi, red->reduc_version);
2334 return 1;
2335}
2336
08dab97a
RL
2337/* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
2338
2339static void
c203e8a7 2340gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
08dab97a 2341{
538dd0b7 2342 gphi_iterator gsi;
08dab97a
RL
2343 loop_vec_info simple_loop_info;
2344
08dab97a
RL
2345 simple_loop_info = vect_analyze_loop_form (loop);
2346
2347 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2348 {
538dd0b7 2349 gphi *phi = gsi.phi ();
08dab97a
RL
2350 affine_iv iv;
2351 tree res = PHI_RESULT (phi);
2352 bool double_reduc;
2353
ea057359 2354 if (virtual_operand_p (res))
08dab97a
RL
2355 continue;
2356
2357 if (!simple_iv (loop, loop, res, &iv, true)
2358 && simple_loop_info)
2359 {
8a9ecffd
MM
2360 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
2361 phi, true,
2362 &double_reduc);
48710229 2363 if (reduc_stmt && !double_reduc)
08dab97a
RL
2364 build_new_reduction (reduction_list, reduc_stmt, phi);
2365 }
2366 }
5d1fd1de
JJ
2367 destroy_loop_vec_info (simple_loop_info, true);
2368
2369 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2370 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2371 only now. */
c203e8a7 2372 reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
08dab97a
RL
2373}
2374
2375/* Try to initialize NITER for code generation part. */
2376
2377static bool
2378try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2379{
2380 edge exit = single_dom_exit (loop);
2381
2382 gcc_assert (exit);
2383
2384 /* We need to know # of iterations, and there should be no uses of values
2385 defined inside loop outside of it, unless the values are invariants of
2386 the loop. */
2387 if (!number_of_iterations_exit (loop, exit, niter, false))
2388 {
2389 if (dump_file && (dump_flags & TDF_DETAILS))
2390 fprintf (dump_file, " FAILED: number of iterations not known\n");
2391 return false;
2392 }
2393
2394 return true;
2395}
2396
2397/* Try to initialize REDUCTION_LIST for code generation part.
2398 REDUCTION_LIST describes the reductions. */
2399
2400static bool
4a8fb1a1 2401try_create_reduction_list (loop_p loop,
c203e8a7 2402 reduction_info_table_type *reduction_list)
08dab97a
RL
2403{
2404 edge exit = single_dom_exit (loop);
538dd0b7 2405 gphi_iterator gsi;
08dab97a
RL
2406
2407 gcc_assert (exit);
2408
2409 gather_scalar_reductions (loop, reduction_list);
2410
b8698a0f 2411
08dab97a
RL
2412 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2413 {
538dd0b7 2414 gphi *phi = gsi.phi ();
08dab97a
RL
2415 struct reduction_info *red;
2416 imm_use_iterator imm_iter;
2417 use_operand_p use_p;
2418 gimple reduc_phi;
2419 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2420
ea057359 2421 if (!virtual_operand_p (val))
08dab97a
RL
2422 {
2423 if (dump_file && (dump_flags & TDF_DETAILS))
2424 {
2425 fprintf (dump_file, "phi is ");
2426 print_gimple_stmt (dump_file, phi, 0, 0);
2427 fprintf (dump_file, "arg of phi to exit: value ");
2428 print_generic_expr (dump_file, val, 0);
2429 fprintf (dump_file, " used outside loop\n");
2430 fprintf (dump_file,
2431 " checking if it a part of reduction pattern: \n");
2432 }
c203e8a7 2433 if (reduction_list->elements () == 0)
08dab97a
RL
2434 {
2435 if (dump_file && (dump_flags & TDF_DETAILS))
2436 fprintf (dump_file,
2437 " FAILED: it is not a part of reduction.\n");
2438 return false;
2439 }
2440 reduc_phi = NULL;
2441 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2442 {
4942af9b
JJ
2443 if (!gimple_debug_bind_p (USE_STMT (use_p))
2444 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
08dab97a
RL
2445 {
2446 reduc_phi = USE_STMT (use_p);
2447 break;
2448 }
2449 }
2450 red = reduction_phi (reduction_list, reduc_phi);
2451 if (red == NULL)
2452 {
2453 if (dump_file && (dump_flags & TDF_DETAILS))
2454 fprintf (dump_file,
2455 " FAILED: it is not a part of reduction.\n");
2456 return false;
2457 }
2458 if (dump_file && (dump_flags & TDF_DETAILS))
2459 {
2460 fprintf (dump_file, "reduction phi is ");
2461 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2462 fprintf (dump_file, "reduction stmt is ");
2463 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2464 }
2465 }
2466 }
2467
2468 /* The iterations of the loop may communicate only through bivs whose
2469 iteration space can be distributed efficiently. */
2470 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2471 {
538dd0b7 2472 gphi *phi = gsi.phi ();
08dab97a
RL
2473 tree def = PHI_RESULT (phi);
2474 affine_iv iv;
2475
ea057359 2476 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
08dab97a
RL
2477 {
2478 struct reduction_info *red;
2479
2480 red = reduction_phi (reduction_list, phi);
2481 if (red == NULL)
2482 {
2483 if (dump_file && (dump_flags & TDF_DETAILS))
2484 fprintf (dump_file,
2485 " FAILED: scalar dependency between iterations\n");
2486 return false;
2487 }
2488 }
2489 }
2490
2491
2492 return true;
2493}
2494
5f40b3cb
ZD
2495/* Detect parallel loops and generate parallel code using libgomp
2496 primitives. Returns true if some loop was parallelized, false
2497 otherwise. */
2498
09489eb8 2499static bool
5f40b3cb
ZD
2500parallelize_loops (void)
2501{
2502 unsigned n_threads = flag_tree_parallelize_loops;
2503 bool changed = false;
2504 struct loop *loop;
2505 struct tree_niter_desc niter_desc;
f873b205 2506 struct obstack parloop_obstack;
8adfe01d 2507 HOST_WIDE_INT estimated;
b05e0233 2508 source_location loop_loc;
f873b205 2509
5f40b3cb
ZD
2510 /* Do not parallelize loops in the functions created by parallelization. */
2511 if (parallelized_function_p (cfun->decl))
2512 return false;
8adfe01d
RL
2513 if (cfun->has_nonlocal_label)
2514 return false;
5f40b3cb 2515
f873b205 2516 gcc_obstack_init (&parloop_obstack);
c203e8a7 2517 reduction_info_table_type reduction_list (10);
726a989a 2518 init_stmt_vec_info_vec ();
a509ebb5 2519
f0bd40b1 2520 FOR_EACH_LOOP (loop, 0)
5f40b3cb 2521 {
4a8fb1a1 2522 reduction_list.empty ();
48710229
RL
2523 if (dump_file && (dump_flags & TDF_DETAILS))
2524 {
2525 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2526 if (loop->inner)
2527 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2528 else
2529 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2530 }
b8698a0f 2531
48710229 2532 /* If we use autopar in graphite pass, we use its marked dependency
87d4d0ee
SP
2533 checking results. */
2534 if (flag_loop_parallelize_all && !loop->can_be_parallel)
48710229
RL
2535 {
2536 if (dump_file && (dump_flags & TDF_DETAILS))
2537 fprintf (dump_file, "loop is not parallel according to graphite\n");
87d4d0ee 2538 continue;
48710229 2539 }
87d4d0ee 2540
48710229
RL
2541 if (!single_dom_exit (loop))
2542 {
b8698a0f 2543
48710229
RL
2544 if (dump_file && (dump_flags & TDF_DETAILS))
2545 fprintf (dump_file, "loop is !single_dom_exit\n");
b8698a0f 2546
08dab97a 2547 continue;
48710229 2548 }
08dab97a
RL
2549
2550 if (/* And of course, the loop must be parallelizable. */
2551 !can_duplicate_loop_p (loop)
1d4af1e8 2552 || loop_has_blocks_with_irreducible_flag (loop)
8adfe01d 2553 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
9857228c 2554 /* FIXME: the check for vector phi nodes could be removed. */
69958396 2555 || loop_has_vector_phi_nodes (loop))
08dab97a 2556 continue;
e5b332cd 2557
652c4c71 2558 estimated = estimated_stmt_executions_int (loop);
e5b332cd
RG
2559 if (estimated == -1)
2560 estimated = max_stmt_executions_int (loop);
87d4d0ee 2561 /* FIXME: Bypass this check as graphite doesn't update the
e5b332cd 2562 count and frequency correctly now. */
87d4d0ee 2563 if (!flag_loop_parallelize_all
e5b332cd
RG
2564 && ((estimated != -1
2565 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
87d4d0ee
SP
2566 /* Do not bother with loops in cold areas. */
2567 || optimize_loop_nest_for_size_p (loop)))
08dab97a 2568 continue;
b8698a0f 2569
08dab97a
RL
2570 if (!try_get_loop_niter (loop, &niter_desc))
2571 continue;
2572
c203e8a7 2573 if (!try_create_reduction_list (loop, &reduction_list))
08dab97a
RL
2574 continue;
2575
f873b205
LB
2576 if (!flag_loop_parallelize_all
2577 && !loop_parallel_p (loop, &parloop_obstack))
5f40b3cb
ZD
2578 continue;
2579
2580 changed = true;
48710229
RL
2581 if (dump_file && (dump_flags & TDF_DETAILS))
2582 {
48710229 2583 if (loop->inner)
8adfe01d 2584 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
48710229 2585 else
8adfe01d
RL
2586 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2587 loop_loc = find_loop_location (loop);
b05e0233 2588 if (loop_loc != UNKNOWN_LOCATION)
8adfe01d 2589 fprintf (dump_file, "\nloop at %s:%d: ",
b05e0233 2590 LOCATION_FILE (loop_loc), LOCATION_LINE (loop_loc));
b8698a0f 2591 }
c203e8a7 2592 gen_parallel_loop (loop, &reduction_list,
08dab97a 2593 n_threads, &niter_desc);
5f40b3cb
ZD
2594 }
2595
726a989a 2596 free_stmt_vec_info_vec ();
f873b205 2597 obstack_free (&parloop_obstack, NULL);
6b8ed145
RG
2598
2599 /* Parallelization will cause new function calls to be inserted through
d086d311
RG
2600 which local variables will escape. Reset the points-to solution
2601 for ESCAPED. */
6b8ed145 2602 if (changed)
d086d311 2603 pt_solution_reset (&cfun->gimple_df->escaped);
6b8ed145 2604
5f40b3cb
ZD
2605 return changed;
2606}
2607
c1bf2a39
AM
2608/* Parallelization. */
2609
c1bf2a39
AM
2610namespace {
2611
2612const pass_data pass_data_parallelize_loops =
2613{
2614 GIMPLE_PASS, /* type */
2615 "parloops", /* name */
2616 OPTGROUP_LOOP, /* optinfo_flags */
c1bf2a39
AM
2617 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
2618 ( PROP_cfg | PROP_ssa ), /* properties_required */
2619 0, /* properties_provided */
2620 0, /* properties_destroyed */
2621 0, /* todo_flags_start */
3bea341f 2622 0, /* todo_flags_finish */
c1bf2a39
AM
2623};
2624
2625class pass_parallelize_loops : public gimple_opt_pass
2626{
2627public:
2628 pass_parallelize_loops (gcc::context *ctxt)
2629 : gimple_opt_pass (pass_data_parallelize_loops, ctxt)
2630 {}
2631
2632 /* opt_pass methods: */
1a3d085c 2633 virtual bool gate (function *) { return flag_tree_parallelize_loops > 1; }
be55bfe6 2634 virtual unsigned int execute (function *);
c1bf2a39
AM
2635
2636}; // class pass_parallelize_loops
2637
be55bfe6
TS
2638unsigned
2639pass_parallelize_loops::execute (function *fun)
2640{
2641 if (number_of_loops (fun) <= 1)
2642 return 0;
2643
2644 if (parallelize_loops ())
18751894
TV
2645 {
2646 fun->curr_properties &= ~(PROP_gimple_eomp);
2647 return TODO_update_ssa;
2648 }
2649
be55bfe6
TS
2650 return 0;
2651}
2652
c1bf2a39
AM
2653} // anon namespace
2654
2655gimple_opt_pass *
2656make_pass_parallelize_loops (gcc::context *ctxt)
2657{
2658 return new pass_parallelize_loops (ctxt);
2659}