]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-parloops.c
attr_thumb.c: Skip if Thumb is not supported.
[thirdparty/gcc.git] / gcc / tree-parloops.c
CommitLineData
5f40b3cb 1/* Loop autoparallelization.
5624e564 2 Copyright (C) 2006-2015 Free Software Foundation, Inc.
70837b71
RL
3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5f40b3cb
ZD
5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
6da7fc87 10Software Foundation; either version 3, or (at your option) any later
5f40b3cb
ZD
11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
6da7fc87
NC
19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
5f40b3cb
ZD
21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
40e23961 25#include "alias.h"
c7131fb2 26#include "backend.h"
40e23961 27#include "tree.h"
c7131fb2 28#include "gimple.h"
60393bbc 29#include "hard-reg-set.h"
c7131fb2
AM
30#include "ssa.h"
31#include "options.h"
32#include "fold-const.h"
2fb9a547 33#include "internal-fn.h"
45b0be94 34#include "gimplify.h"
5be5c238 35#include "gimple-iterator.h"
18f429e2 36#include "gimplify-me.h"
5be5c238 37#include "gimple-walk.h"
d8a2d370
DN
38#include "stor-layout.h"
39#include "tree-nested.h"
442b4905 40#include "tree-cfg.h"
e28030cf
AM
41#include "tree-ssa-loop-ivopts.h"
42#include "tree-ssa-loop-manip.h"
43#include "tree-ssa-loop-niter.h"
442b4905
AM
44#include "tree-ssa-loop.h"
45#include "tree-into-ssa.h"
5f40b3cb 46#include "cfgloop.h"
5f40b3cb 47#include "tree-data-ref.h"
1bd6497c 48#include "tree-scalar-evolution.h"
cf835838 49#include "gimple-pretty-print.h"
5f40b3cb 50#include "tree-pass.h"
5f40b3cb 51#include "langhooks.h"
a509ebb5 52#include "tree-vectorizer.h"
4a8fb1a1 53#include "tree-hasher.h"
c1bf2a39 54#include "tree-parloops.h"
0645c1a2 55#include "omp-low.h"
1fe37220 56#include "tree-nested.h"
a79b7ec5 57#include "cgraph.h"
7c82d827 58#include "tree-ssa.h"
5f40b3cb
ZD
59
60/* This pass tries to distribute iterations of loops into several threads.
61 The implementation is straightforward -- for each loop we test whether its
62 iterations are independent, and if it is the case (and some additional
63 conditions regarding profitability and correctness are satisfied), we
726a989a
RB
64 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
65 machinery do its job.
b8698a0f 66
5f40b3cb
ZD
67 The most of the complexity is in bringing the code into shape expected
68 by the omp expanders:
726a989a
RB
69 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
70 variable and that the exit test is at the start of the loop body
71 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
5f40b3cb
ZD
72 variables by accesses through pointers, and breaking up ssa chains
73 by storing the values incoming to the parallelized loop to a structure
74 passed to the new function as an argument (something similar is done
75 in omp gimplification, unfortunately only a small part of the code
76 can be shared).
77
78 TODO:
79 -- if there are several parallelizable loops in a function, it may be
80 possible to generate the threads just once (using synchronization to
81 ensure that cross-loop dependences are obeyed).
70837b71
RL
82 -- handling of common reduction patterns for outer loops.
83
84 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
b8698a0f 85/*
a509ebb5 86 Reduction handling:
8a9ecffd 87 currently we use vect_force_simple_reduction() to detect reduction patterns.
a509ebb5 88 The code transformation will be introduced by an example.
b8698a0f
L
89
90
a509ebb5
RL
91parloop
92{
93 int sum=1;
94
0eb7e7aa 95 for (i = 0; i < N; i++)
a509ebb5
RL
96 {
97 x[i] = i + 3;
98 sum+=x[i];
99 }
100}
101
0eb7e7aa 102gimple-like code:
a509ebb5
RL
103header_bb:
104
0eb7e7aa
RL
105 # sum_29 = PHI <sum_11(5), 1(3)>
106 # i_28 = PHI <i_12(5), 0(3)>
107 D.1795_8 = i_28 + 3;
108 x[i_28] = D.1795_8;
109 sum_11 = D.1795_8 + sum_29;
110 i_12 = i_28 + 1;
111 if (N_6(D) > i_12)
112 goto header_bb;
113
a509ebb5
RL
114
115exit_bb:
116
0eb7e7aa
RL
117 # sum_21 = PHI <sum_11(4)>
118 printf (&"%d"[0], sum_21);
a509ebb5
RL
119
120
121after reduction transformation (only relevant parts):
122
123parloop
124{
125
126....
127
0eb7e7aa 128
fa10beec 129 # Storing the initial value given by the user. #
0eb7e7aa 130
ae0bce62 131 .paral_data_store.32.sum.27 = 1;
b8698a0f
L
132
133 #pragma omp parallel num_threads(4)
a509ebb5 134
0eb7e7aa 135 #pragma omp for schedule(static)
ae0bce62
RL
136
137 # The neutral element corresponding to the particular
138 reduction's operation, e.g. 0 for PLUS_EXPR,
139 1 for MULT_EXPR, etc. replaces the user's initial value. #
140
141 # sum.27_29 = PHI <sum.27_11, 0>
142
0eb7e7aa 143 sum.27_11 = D.1827_8 + sum.27_29;
ae0bce62 144
726a989a 145 GIMPLE_OMP_CONTINUE
a509ebb5 146
0eb7e7aa
RL
147 # Adding this reduction phi is done at create_phi_for_local_result() #
148 # sum.27_56 = PHI <sum.27_11, 0>
726a989a 149 GIMPLE_OMP_RETURN
b8698a0f
L
150
151 # Creating the atomic operation is done at
0eb7e7aa 152 create_call_for_reduction_1() #
a509ebb5 153
0eb7e7aa
RL
154 #pragma omp atomic_load
155 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
156 D.1840_60 = sum.27_56 + D.1839_59;
157 #pragma omp atomic_store (D.1840_60);
b8698a0f 158
726a989a 159 GIMPLE_OMP_RETURN
b8698a0f 160
0eb7e7aa
RL
161 # collecting the result after the join of the threads is done at
162 create_loads_for_reductions().
ae0bce62
RL
163 The value computed by the threads is loaded from the
164 shared struct. #
165
b8698a0f 166
0eb7e7aa 167 .paral_data_load.33_52 = &.paral_data_store.32;
ae0bce62 168 sum_37 = .paral_data_load.33_52->sum.27;
0eb7e7aa
RL
169 sum_43 = D.1795_41 + sum_37;
170
171 exit bb:
172 # sum_21 = PHI <sum_43, sum_26>
173 printf (&"%d"[0], sum_21);
174
175...
176
a509ebb5
RL
177}
178
179*/
180
5f40b3cb
ZD
181/* Minimal number of iterations of a loop that should be executed in each
182 thread. */
183#define MIN_PER_THREAD 100
184
b8698a0f 185/* Element of the hashtable, representing a
a509ebb5
RL
186 reduction in the current loop. */
187struct reduction_info
188{
726a989a
RB
189 gimple reduc_stmt; /* reduction statement. */
190 gimple reduc_phi; /* The phi node defining the reduction. */
191 enum tree_code reduction_code;/* code for the reduction operation. */
5d1fd1de
JJ
192 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
193 result. */
538dd0b7 194 gphi *keep_res; /* The PHI_RESULT of this phi is the resulting value
a509ebb5 195 of the reduction variable when existing the loop. */
ae0bce62 196 tree initial_value; /* The initial value of the reduction var before entering the loop. */
a509ebb5 197 tree field; /* the name of the field in the parloop data structure intended for reduction. */
a509ebb5 198 tree init; /* reduction initialization value. */
538dd0b7 199 gphi *new_phi; /* (helper field) Newly created phi node whose result
a509ebb5
RL
200 will be passed to the atomic operation. Represents
201 the local result each thread computed for the reduction
202 operation. */
203};
204
4a8fb1a1 205/* Reduction info hashtable helpers. */
a509ebb5 206
95fbe13e 207struct reduction_hasher : free_ptr_hash <reduction_info>
a509ebb5 208{
67f58944
TS
209 static inline hashval_t hash (const reduction_info *);
210 static inline bool equal (const reduction_info *, const reduction_info *);
4a8fb1a1
LC
211};
212
213/* Equality and hash functions for hashtab code. */
a509ebb5 214
4a8fb1a1 215inline bool
67f58944 216reduction_hasher::equal (const reduction_info *a, const reduction_info *b)
4a8fb1a1 217{
a509ebb5
RL
218 return (a->reduc_phi == b->reduc_phi);
219}
220
4a8fb1a1 221inline hashval_t
67f58944 222reduction_hasher::hash (const reduction_info *a)
a509ebb5 223{
5d1fd1de 224 return a->reduc_version;
a509ebb5
RL
225}
226
c203e8a7 227typedef hash_table<reduction_hasher> reduction_info_table_type;
4a8fb1a1
LC
228
229
a509ebb5 230static struct reduction_info *
c203e8a7 231reduction_phi (reduction_info_table_type *reduction_list, gimple phi)
a509ebb5
RL
232{
233 struct reduction_info tmpred, *red;
234
c203e8a7 235 if (reduction_list->elements () == 0 || phi == NULL)
a509ebb5
RL
236 return NULL;
237
238 tmpred.reduc_phi = phi;
5d1fd1de 239 tmpred.reduc_version = gimple_uid (phi);
c203e8a7 240 red = reduction_list->find (&tmpred);
a509ebb5
RL
241
242 return red;
243}
244
5f40b3cb
ZD
245/* Element of hashtable of names to copy. */
246
247struct name_to_copy_elt
248{
249 unsigned version; /* The version of the name to copy. */
250 tree new_name; /* The new name used in the copy. */
251 tree field; /* The field of the structure used to pass the
252 value. */
253};
254
4a8fb1a1 255/* Name copies hashtable helpers. */
5f40b3cb 256
95fbe13e 257struct name_to_copy_hasher : free_ptr_hash <name_to_copy_elt>
5f40b3cb 258{
67f58944
TS
259 static inline hashval_t hash (const name_to_copy_elt *);
260 static inline bool equal (const name_to_copy_elt *, const name_to_copy_elt *);
4a8fb1a1
LC
261};
262
263/* Equality and hash functions for hashtab code. */
5f40b3cb 264
4a8fb1a1 265inline bool
67f58944 266name_to_copy_hasher::equal (const name_to_copy_elt *a, const name_to_copy_elt *b)
4a8fb1a1 267{
5f40b3cb
ZD
268 return a->version == b->version;
269}
270
4a8fb1a1 271inline hashval_t
67f58944 272name_to_copy_hasher::hash (const name_to_copy_elt *a)
5f40b3cb 273{
5f40b3cb
ZD
274 return (hashval_t) a->version;
275}
276
c203e8a7 277typedef hash_table<name_to_copy_hasher> name_to_copy_table_type;
4a8fb1a1 278
b305e3da
SP
279/* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
280 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
281 represents the denominator for every element in the matrix. */
282typedef struct lambda_trans_matrix_s
283{
284 lambda_matrix matrix;
285 int rowsize;
286 int colsize;
287 int denominator;
288} *lambda_trans_matrix;
289#define LTM_MATRIX(T) ((T)->matrix)
290#define LTM_ROWSIZE(T) ((T)->rowsize)
291#define LTM_COLSIZE(T) ((T)->colsize)
292#define LTM_DENOMINATOR(T) ((T)->denominator)
293
294/* Allocate a new transformation matrix. */
295
296static lambda_trans_matrix
297lambda_trans_matrix_new (int colsize, int rowsize,
298 struct obstack * lambda_obstack)
299{
300 lambda_trans_matrix ret;
301
302 ret = (lambda_trans_matrix)
303 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
304 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
305 LTM_ROWSIZE (ret) = rowsize;
306 LTM_COLSIZE (ret) = colsize;
307 LTM_DENOMINATOR (ret) = 1;
308 return ret;
309}
310
311/* Multiply a vector VEC by a matrix MAT.
312 MAT is an M*N matrix, and VEC is a vector with length N. The result
313 is stored in DEST which must be a vector of length M. */
314
315static void
316lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
317 lambda_vector vec, lambda_vector dest)
318{
319 int i, j;
320
321 lambda_vector_clear (dest, m);
322 for (i = 0; i < m; i++)
323 for (j = 0; j < n; j++)
324 dest[i] += matrix[i][j] * vec[j];
325}
326
327/* Return true if TRANS is a legal transformation matrix that respects
328 the dependence vectors in DISTS and DIRS. The conservative answer
329 is false.
330
331 "Wolfe proves that a unimodular transformation represented by the
332 matrix T is legal when applied to a loop nest with a set of
333 lexicographically non-negative distance vectors RDG if and only if
334 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
335 i.e.: if and only if it transforms the lexicographically positive
336 distance vectors to lexicographically positive vectors. Note that
337 a unimodular matrix must transform the zero vector (and only it) to
338 the zero vector." S.Muchnick. */
339
340static bool
341lambda_transform_legal_p (lambda_trans_matrix trans,
342 int nb_loops,
9771b263 343 vec<ddr_p> dependence_relations)
b305e3da
SP
344{
345 unsigned int i, j;
346 lambda_vector distres;
347 struct data_dependence_relation *ddr;
348
349 gcc_assert (LTM_COLSIZE (trans) == nb_loops
350 && LTM_ROWSIZE (trans) == nb_loops);
351
352 /* When there are no dependences, the transformation is correct. */
9771b263 353 if (dependence_relations.length () == 0)
b305e3da
SP
354 return true;
355
9771b263 356 ddr = dependence_relations[0];
b305e3da
SP
357 if (ddr == NULL)
358 return true;
359
360 /* When there is an unknown relation in the dependence_relations, we
361 know that it is no worth looking at this loop nest: give up. */
362 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
363 return false;
364
365 distres = lambda_vector_new (nb_loops);
366
367 /* For each distance vector in the dependence graph. */
9771b263 368 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
b305e3da
SP
369 {
370 /* Don't care about relations for which we know that there is no
371 dependence, nor about read-read (aka. output-dependences):
372 these data accesses can happen in any order. */
373 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
374 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
375 continue;
376
377 /* Conservatively answer: "this transformation is not valid". */
378 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
379 return false;
380
381 /* If the dependence could not be captured by a distance vector,
382 conservatively answer that the transform is not valid. */
383 if (DDR_NUM_DIST_VECTS (ddr) == 0)
384 return false;
385
386 /* Compute trans.dist_vect */
387 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
388 {
389 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
390 DDR_DIST_VECT (ddr, j), distres);
391
392 if (!lambda_vector_lexico_pos (distres, nb_loops))
393 return false;
394 }
395 }
396 return true;
397}
08dab97a
RL
398
399/* Data dependency analysis. Returns true if the iterations of LOOP
400 are independent on each other (that is, if we can execute them
401 in parallel). */
5f40b3cb
ZD
402
403static bool
f873b205 404loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
5f40b3cb 405{
9771b263
DN
406 vec<ddr_p> dependence_relations;
407 vec<data_reference_p> datarefs;
5f40b3cb
ZD
408 lambda_trans_matrix trans;
409 bool ret = false;
5f40b3cb
ZD
410
411 if (dump_file && (dump_flags & TDF_DETAILS))
48710229
RL
412 {
413 fprintf (dump_file, "Considering loop %d\n", loop->num);
414 if (!loop->inner)
415 fprintf (dump_file, "loop is innermost\n");
b8698a0f 416 else
48710229
RL
417 fprintf (dump_file, "loop NOT innermost\n");
418 }
5f40b3cb 419
5f40b3cb
ZD
420 /* Check for problems with dependences. If the loop can be reversed,
421 the iterations are independent. */
00f96dc9 422 auto_vec<loop_p, 3> loop_nest;
9771b263 423 datarefs.create (10);
07687835 424 dependence_relations.create (100);
9ca3d00e
AB
425 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
426 &dependence_relations))
427 {
428 if (dump_file && (dump_flags & TDF_DETAILS))
429 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
430 ret = false;
431 goto end;
432 }
5f40b3cb
ZD
433 if (dump_file && (dump_flags & TDF_DETAILS))
434 dump_data_dependence_relations (dump_file, dependence_relations);
435
f873b205 436 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
5f40b3cb
ZD
437 LTM_MATRIX (trans)[0][0] = -1;
438
439 if (lambda_transform_legal_p (trans, 1, dependence_relations))
440 {
441 ret = true;
442 if (dump_file && (dump_flags & TDF_DETAILS))
443 fprintf (dump_file, " SUCCESS: may be parallelized\n");
444 }
445 else if (dump_file && (dump_flags & TDF_DETAILS))
a509ebb5
RL
446 fprintf (dump_file,
447 " FAILED: data dependencies exist across iterations\n");
5f40b3cb 448
9ca3d00e 449 end:
5f40b3cb
ZD
450 free_dependence_relations (dependence_relations);
451 free_data_refs (datarefs);
452
453 return ret;
454}
455
1d4af1e8
SP
456/* Return true when LOOP contains basic blocks marked with the
457 BB_IRREDUCIBLE_LOOP flag. */
458
459static inline bool
460loop_has_blocks_with_irreducible_flag (struct loop *loop)
461{
462 unsigned i;
463 basic_block *bbs = get_loop_body_in_dom_order (loop);
464 bool res = true;
465
466 for (i = 0; i < loop->num_nodes; i++)
467 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
468 goto end;
469
470 res = false;
471 end:
472 free (bbs);
473 return res;
474}
475
8a171a59 476/* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
9f9f72aa 477 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
8a171a59 478 to their addresses that can be reused. The address of OBJ is known to
cba1eb61
JJ
479 be invariant in the whole function. Other needed statements are placed
480 right before GSI. */
5f40b3cb
ZD
481
482static tree
4a8fb1a1 483take_address_of (tree obj, tree type, edge entry,
c203e8a7 484 int_tree_htab_type *decl_address, gimple_stmt_iterator *gsi)
5f40b3cb 485{
8a171a59 486 int uid;
83d5977e 487 tree *var_p, name, addr;
538dd0b7 488 gassign *stmt;
726a989a 489 gimple_seq stmts;
5f40b3cb 490
8a171a59
ZD
491 /* Since the address of OBJ is invariant, the trees may be shared.
492 Avoid rewriting unrelated parts of the code. */
493 obj = unshare_expr (obj);
494 for (var_p = &obj;
495 handled_component_p (*var_p);
496 var_p = &TREE_OPERAND (*var_p, 0))
497 continue;
8a171a59 498
c9a410f0
RG
499 /* Canonicalize the access to base on a MEM_REF. */
500 if (DECL_P (*var_p))
501 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
502
503 /* Assign a canonical SSA name to the address of the base decl used
504 in the address and share it for all accesses and addresses based
505 on it. */
506 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
84baa4b9
TS
507 int_tree_map elt;
508 elt.uid = uid;
509 int_tree_map *slot = decl_address->find_slot (elt, INSERT);
510 if (!slot->to)
5f40b3cb 511 {
cba1eb61
JJ
512 if (gsi == NULL)
513 return NULL;
c9a410f0 514 addr = TREE_OPERAND (*var_p, 0);
29b89442
JJ
515 const char *obj_name
516 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
517 if (obj_name)
518 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
519 else
b731b390 520 name = make_ssa_name (TREE_TYPE (addr));
83d5977e 521 stmt = gimple_build_assign (name, addr);
726a989a 522 gsi_insert_on_edge_immediate (entry, stmt);
5f40b3cb 523
84baa4b9
TS
524 slot->uid = uid;
525 slot->to = name;
5f40b3cb 526 }
8a171a59 527 else
84baa4b9 528 name = slot->to;
5f40b3cb 529
c9a410f0
RG
530 /* Express the address in terms of the canonical SSA name. */
531 TREE_OPERAND (*var_p, 0) = name;
cba1eb61
JJ
532 if (gsi == NULL)
533 return build_fold_addr_expr_with_type (obj, type);
534
c9a410f0
RG
535 name = force_gimple_operand (build_addr (obj, current_function_decl),
536 &stmts, true, NULL_TREE);
537 if (!gimple_seq_empty_p (stmts))
cba1eb61 538 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
5f40b3cb 539
c9a410f0 540 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
8a171a59 541 {
726a989a 542 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
8a171a59 543 NULL_TREE);
726a989a 544 if (!gimple_seq_empty_p (stmts))
cba1eb61 545 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
8a171a59 546 }
5f40b3cb
ZD
547
548 return name;
549}
550
a509ebb5 551/* Callback for htab_traverse. Create the initialization statement
b8698a0f 552 for reduction described in SLOT, and place it at the preheader of
a509ebb5
RL
553 the loop described in DATA. */
554
4a8fb1a1
LC
555int
556initialize_reductions (reduction_info **slot, struct loop *loop)
a509ebb5 557{
a509ebb5 558 tree init, c;
a509ebb5
RL
559 tree bvar, type, arg;
560 edge e;
561
4a8fb1a1 562 struct reduction_info *const reduc = *slot;
a509ebb5 563
b8698a0f 564 /* Create initialization in preheader:
a509ebb5
RL
565 reduction_variable = initialization value of reduction. */
566
b8698a0f 567 /* In the phi node at the header, replace the argument coming
a509ebb5
RL
568 from the preheader with the reduction initialization value. */
569
570 /* Create a new variable to initialize the reduction. */
571 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
572 bvar = create_tmp_var (type, "reduction");
a509ebb5 573
c2255bc4
AH
574 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
575 OMP_CLAUSE_REDUCTION);
a509ebb5 576 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
726a989a 577 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
a509ebb5
RL
578
579 init = omp_reduction_init (c, TREE_TYPE (bvar));
580 reduc->init = init;
581
b8698a0f
L
582 /* Replace the argument representing the initialization value
583 with the initialization value for the reduction (neutral
584 element for the particular operation, e.g. 0 for PLUS_EXPR,
585 1 for MULT_EXPR, etc).
586 Keep the old value in a new variable "reduction_initial",
587 that will be taken in consideration after the parallel
0eb7e7aa 588 computing is done. */
a509ebb5
RL
589
590 e = loop_preheader_edge (loop);
591 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
592 /* Create new variable to hold the initial value. */
a509ebb5 593
a509ebb5 594 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
0eb7e7aa 595 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
ae0bce62 596 reduc->initial_value = arg;
a509ebb5
RL
597 return 1;
598}
5f40b3cb
ZD
599
600struct elv_data
601{
726a989a 602 struct walk_stmt_info info;
9f9f72aa 603 edge entry;
c203e8a7 604 int_tree_htab_type *decl_address;
cba1eb61 605 gimple_stmt_iterator *gsi;
5f40b3cb 606 bool changed;
cba1eb61 607 bool reset;
5f40b3cb
ZD
608};
609
9f9f72aa
AP
610/* Eliminates references to local variables in *TP out of the single
611 entry single exit region starting at DTA->ENTRY.
612 DECL_ADDRESS contains addresses of the references that had their
613 address taken already. If the expression is changed, CHANGED is
614 set to true. Callback for walk_tree. */
a509ebb5 615
5f40b3cb 616static tree
8a171a59 617eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
5f40b3cb 618{
3d9a9f94 619 struct elv_data *const dta = (struct elv_data *) data;
8a171a59 620 tree t = *tp, var, addr, addr_type, type, obj;
5f40b3cb
ZD
621
622 if (DECL_P (t))
623 {
624 *walk_subtrees = 0;
625
626 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
627 return NULL_TREE;
628
629 type = TREE_TYPE (t);
630 addr_type = build_pointer_type (type);
cba1eb61
JJ
631 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
632 dta->gsi);
633 if (dta->gsi == NULL && addr == NULL_TREE)
634 {
635 dta->reset = true;
636 return NULL_TREE;
637 }
638
70f34814 639 *tp = build_simple_mem_ref (addr);
5f40b3cb
ZD
640
641 dta->changed = true;
642 return NULL_TREE;
643 }
644
645 if (TREE_CODE (t) == ADDR_EXPR)
646 {
8a171a59
ZD
647 /* ADDR_EXPR may appear in two contexts:
648 -- as a gimple operand, when the address taken is a function invariant
649 -- as gimple rhs, when the resulting address in not a function
650 invariant
651 We do not need to do anything special in the latter case (the base of
652 the memory reference whose address is taken may be replaced in the
653 DECL_P case). The former case is more complicated, as we need to
654 ensure that the new address is still a gimple operand. Thus, it
655 is not sufficient to replace just the base of the memory reference --
656 we need to move the whole computation of the address out of the
657 loop. */
658 if (!is_gimple_val (t))
5f40b3cb
ZD
659 return NULL_TREE;
660
661 *walk_subtrees = 0;
8a171a59
ZD
662 obj = TREE_OPERAND (t, 0);
663 var = get_base_address (obj);
664 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
5f40b3cb
ZD
665 return NULL_TREE;
666
667 addr_type = TREE_TYPE (t);
cba1eb61
JJ
668 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
669 dta->gsi);
670 if (dta->gsi == NULL && addr == NULL_TREE)
671 {
672 dta->reset = true;
673 return NULL_TREE;
674 }
5f40b3cb
ZD
675 *tp = addr;
676
677 dta->changed = true;
678 return NULL_TREE;
679 }
680
726a989a 681 if (!EXPR_P (t))
5f40b3cb
ZD
682 *walk_subtrees = 0;
683
684 return NULL_TREE;
685}
686
cba1eb61 687/* Moves the references to local variables in STMT at *GSI out of the single
9f9f72aa
AP
688 entry single exit region starting at ENTRY. DECL_ADDRESS contains
689 addresses of the references that had their address taken
690 already. */
5f40b3cb
ZD
691
692static void
cba1eb61 693eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
c203e8a7 694 int_tree_htab_type *decl_address)
5f40b3cb
ZD
695{
696 struct elv_data dta;
cba1eb61 697 gimple stmt = gsi_stmt (*gsi);
5f40b3cb 698
726a989a 699 memset (&dta.info, '\0', sizeof (dta.info));
9f9f72aa 700 dta.entry = entry;
5f40b3cb
ZD
701 dta.decl_address = decl_address;
702 dta.changed = false;
cba1eb61 703 dta.reset = false;
5f40b3cb 704
b5b8b0ac 705 if (gimple_debug_bind_p (stmt))
cba1eb61
JJ
706 {
707 dta.gsi = NULL;
708 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
709 eliminate_local_variables_1, &dta.info, NULL);
710 if (dta.reset)
711 {
712 gimple_debug_bind_reset_value (stmt);
713 dta.changed = true;
714 }
715 }
29b89442
JJ
716 else if (gimple_clobber_p (stmt))
717 {
718 stmt = gimple_build_nop ();
719 gsi_replace (gsi, stmt, false);
720 dta.changed = true;
721 }
b5b8b0ac 722 else
cba1eb61
JJ
723 {
724 dta.gsi = gsi;
725 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
726 }
5f40b3cb
ZD
727
728 if (dta.changed)
729 update_stmt (stmt);
730}
731
9f9f72aa
AP
732/* Eliminates the references to local variables from the single entry
733 single exit region between the ENTRY and EXIT edges.
b8698a0f 734
a509ebb5 735 This includes:
b8698a0f
L
736 1) Taking address of a local variable -- these are moved out of the
737 region (and temporary variable is created to hold the address if
a509ebb5 738 necessary).
9f9f72aa 739
5f40b3cb 740 2) Dereferencing a local variable -- these are replaced with indirect
a509ebb5 741 references. */
5f40b3cb
ZD
742
743static void
9f9f72aa 744eliminate_local_variables (edge entry, edge exit)
5f40b3cb 745{
9f9f72aa 746 basic_block bb;
00f96dc9 747 auto_vec<basic_block, 3> body;
5f40b3cb 748 unsigned i;
726a989a 749 gimple_stmt_iterator gsi;
cba1eb61 750 bool has_debug_stmt = false;
c203e8a7 751 int_tree_htab_type decl_address (10);
9f9f72aa
AP
752 basic_block entry_bb = entry->src;
753 basic_block exit_bb = exit->dest;
5f40b3cb 754
9f9f72aa 755 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 756
9771b263 757 FOR_EACH_VEC_ELT (body, i, bb)
9f9f72aa 758 if (bb != entry_bb && bb != exit_bb)
726a989a 759 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
ddb555ed
JJ
760 if (is_gimple_debug (gsi_stmt (gsi)))
761 {
762 if (gimple_debug_bind_p (gsi_stmt (gsi)))
763 has_debug_stmt = true;
764 }
cba1eb61 765 else
c203e8a7 766 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
cba1eb61
JJ
767
768 if (has_debug_stmt)
9771b263 769 FOR_EACH_VEC_ELT (body, i, bb)
cba1eb61
JJ
770 if (bb != entry_bb && bb != exit_bb)
771 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
772 if (gimple_debug_bind_p (gsi_stmt (gsi)))
c203e8a7 773 eliminate_local_variables_stmt (entry, &gsi, &decl_address);
9f9f72aa
AP
774}
775
776/* Returns true if expression EXPR is not defined between ENTRY and
777 EXIT, i.e. if all its operands are defined outside of the region. */
778
779static bool
780expr_invariant_in_region_p (edge entry, edge exit, tree expr)
781{
782 basic_block entry_bb = entry->src;
783 basic_block exit_bb = exit->dest;
784 basic_block def_bb;
9f9f72aa
AP
785
786 if (is_gimple_min_invariant (expr))
787 return true;
788
789 if (TREE_CODE (expr) == SSA_NAME)
790 {
726a989a 791 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
9f9f72aa
AP
792 if (def_bb
793 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
794 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
795 return false;
796
797 return true;
798 }
799
726a989a 800 return false;
5f40b3cb
ZD
801}
802
803/* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
804 The copies are stored to NAME_COPIES, if NAME was already duplicated,
805 its duplicate stored in NAME_COPIES is returned.
b8698a0f 806
5f40b3cb
ZD
807 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
808 duplicated, storing the copies in DECL_COPIES. */
809
810static tree
c203e8a7
TS
811separate_decls_in_region_name (tree name, name_to_copy_table_type *name_copies,
812 int_tree_htab_type *decl_copies,
813 bool copy_name_p)
5f40b3cb
ZD
814{
815 tree copy, var, var_copy;
816 unsigned idx, uid, nuid;
84baa4b9 817 struct int_tree_map ielt;
5f40b3cb 818 struct name_to_copy_elt elt, *nelt;
4a8fb1a1 819 name_to_copy_elt **slot;
84baa4b9 820 int_tree_map *dslot;
5f40b3cb
ZD
821
822 if (TREE_CODE (name) != SSA_NAME)
823 return name;
824
825 idx = SSA_NAME_VERSION (name);
826 elt.version = idx;
c203e8a7
TS
827 slot = name_copies->find_slot_with_hash (&elt, idx,
828 copy_name_p ? INSERT : NO_INSERT);
5f40b3cb 829 if (slot && *slot)
4a8fb1a1 830 return (*slot)->new_name;
5f40b3cb 831
70b5e7dc
RG
832 if (copy_name_p)
833 {
834 copy = duplicate_ssa_name (name, NULL);
835 nelt = XNEW (struct name_to_copy_elt);
836 nelt->version = idx;
837 nelt->new_name = copy;
838 nelt->field = NULL_TREE;
839 *slot = nelt;
840 }
841 else
842 {
843 gcc_assert (!slot);
844 copy = name;
845 }
846
5f40b3cb 847 var = SSA_NAME_VAR (name);
70b5e7dc
RG
848 if (!var)
849 return copy;
850
5f40b3cb
ZD
851 uid = DECL_UID (var);
852 ielt.uid = uid;
84baa4b9
TS
853 dslot = decl_copies->find_slot_with_hash (ielt, uid, INSERT);
854 if (!dslot->to)
5f40b3cb
ZD
855 {
856 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
36ad7922 857 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
84baa4b9
TS
858 dslot->uid = uid;
859 dslot->to = var_copy;
5f40b3cb
ZD
860
861 /* Ensure that when we meet this decl next time, we won't duplicate
a509ebb5 862 it again. */
5f40b3cb
ZD
863 nuid = DECL_UID (var_copy);
864 ielt.uid = nuid;
84baa4b9
TS
865 dslot = decl_copies->find_slot_with_hash (ielt, nuid, INSERT);
866 gcc_assert (!dslot->to);
867 dslot->uid = nuid;
868 dslot->to = var_copy;
5f40b3cb
ZD
869 }
870 else
84baa4b9 871 var_copy = dslot->to;
5f40b3cb 872
b2ec94d4 873 replace_ssa_name_symbol (copy, var_copy);
5f40b3cb
ZD
874 return copy;
875}
876
9f9f72aa
AP
877/* Finds the ssa names used in STMT that are defined outside the
878 region between ENTRY and EXIT and replaces such ssa names with
879 their duplicates. The duplicates are stored to NAME_COPIES. Base
880 decls of all ssa names used in STMT (including those defined in
881 LOOP) are replaced with the new temporary variables; the
882 replacement decls are stored in DECL_COPIES. */
5f40b3cb
ZD
883
884static void
726a989a 885separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
c203e8a7
TS
886 name_to_copy_table_type *name_copies,
887 int_tree_htab_type *decl_copies)
5f40b3cb
ZD
888{
889 use_operand_p use;
890 def_operand_p def;
891 ssa_op_iter oi;
892 tree name, copy;
893 bool copy_name_p;
894
5f40b3cb 895 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
a509ebb5
RL
896 {
897 name = DEF_FROM_PTR (def);
898 gcc_assert (TREE_CODE (name) == SSA_NAME);
9f9f72aa
AP
899 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
900 false);
a509ebb5
RL
901 gcc_assert (copy == name);
902 }
5f40b3cb
ZD
903
904 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
a509ebb5
RL
905 {
906 name = USE_FROM_PTR (use);
907 if (TREE_CODE (name) != SSA_NAME)
908 continue;
909
9f9f72aa
AP
910 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
911 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
912 copy_name_p);
a509ebb5
RL
913 SET_USE (use, copy);
914 }
5f40b3cb
ZD
915}
916
b5b8b0ac
AO
917/* Finds the ssa names used in STMT that are defined outside the
918 region between ENTRY and EXIT and replaces such ssa names with
919 their duplicates. The duplicates are stored to NAME_COPIES. Base
920 decls of all ssa names used in STMT (including those defined in
921 LOOP) are replaced with the new temporary variables; the
922 replacement decls are stored in DECL_COPIES. */
923
924static bool
4a8fb1a1 925separate_decls_in_region_debug (gimple stmt,
c203e8a7
TS
926 name_to_copy_table_type *name_copies,
927 int_tree_htab_type *decl_copies)
b5b8b0ac
AO
928{
929 use_operand_p use;
930 ssa_op_iter oi;
931 tree var, name;
932 struct int_tree_map ielt;
933 struct name_to_copy_elt elt;
4a8fb1a1 934 name_to_copy_elt **slot;
84baa4b9 935 int_tree_map *dslot;
b5b8b0ac 936
ddb555ed
JJ
937 if (gimple_debug_bind_p (stmt))
938 var = gimple_debug_bind_get_var (stmt);
939 else if (gimple_debug_source_bind_p (stmt))
940 var = gimple_debug_source_bind_get_var (stmt);
941 else
942 return true;
598e67d7 943 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
4f2a9af8 944 return true;
b5b8b0ac
AO
945 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
946 ielt.uid = DECL_UID (var);
84baa4b9 947 dslot = decl_copies->find_slot_with_hash (ielt, ielt.uid, NO_INSERT);
b5b8b0ac
AO
948 if (!dslot)
949 return true;
ddb555ed 950 if (gimple_debug_bind_p (stmt))
84baa4b9 951 gimple_debug_bind_set_var (stmt, dslot->to);
ddb555ed 952 else if (gimple_debug_source_bind_p (stmt))
84baa4b9 953 gimple_debug_source_bind_set_var (stmt, dslot->to);
b5b8b0ac
AO
954
955 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
956 {
957 name = USE_FROM_PTR (use);
958 if (TREE_CODE (name) != SSA_NAME)
959 continue;
960
961 elt.version = SSA_NAME_VERSION (name);
c203e8a7 962 slot = name_copies->find_slot_with_hash (&elt, elt.version, NO_INSERT);
b5b8b0ac
AO
963 if (!slot)
964 {
965 gimple_debug_bind_reset_value (stmt);
966 update_stmt (stmt);
967 break;
968 }
969
4a8fb1a1 970 SET_USE (use, (*slot)->new_name);
b5b8b0ac
AO
971 }
972
973 return false;
974}
975
0eb7e7aa
RL
976/* Callback for htab_traverse. Adds a field corresponding to the reduction
977 specified in SLOT. The type is passed in DATA. */
978
4a8fb1a1
LC
979int
980add_field_for_reduction (reduction_info **slot, tree type)
a509ebb5 981{
b8698a0f 982
4a8fb1a1 983 struct reduction_info *const red = *slot;
aa06a978
RB
984 tree var = gimple_assign_lhs (red->reduc_stmt);
985 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
986 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
0eb7e7aa
RL
987
988 insert_field_into_struct (type, field);
989
990 red->field = field;
991
992 return 1;
993}
a509ebb5 994
5f40b3cb 995/* Callback for htab_traverse. Adds a field corresponding to a ssa name
b8698a0f 996 described in SLOT. The type is passed in DATA. */
5f40b3cb 997
4a8fb1a1
LC
998int
999add_field_for_name (name_to_copy_elt **slot, tree type)
5f40b3cb 1000{
4a8fb1a1 1001 struct name_to_copy_elt *const elt = *slot;
5f40b3cb 1002 tree name = ssa_name (elt->version);
70b5e7dc
RG
1003 tree field = build_decl (UNKNOWN_LOCATION,
1004 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
1005 TREE_TYPE (name));
5f40b3cb
ZD
1006
1007 insert_field_into_struct (type, field);
1008 elt->field = field;
a509ebb5 1009
5f40b3cb
ZD
1010 return 1;
1011}
1012
b8698a0f
L
1013/* Callback for htab_traverse. A local result is the intermediate result
1014 computed by a single
fa10beec 1015 thread, or the initial value in case no iteration was executed.
b8698a0f
L
1016 This function creates a phi node reflecting these values.
1017 The phi's result will be stored in NEW_PHI field of the
1018 reduction's data structure. */
a509ebb5 1019
4a8fb1a1
LC
1020int
1021create_phi_for_local_result (reduction_info **slot, struct loop *loop)
a509ebb5 1022{
4a8fb1a1 1023 struct reduction_info *const reduc = *slot;
a509ebb5 1024 edge e;
538dd0b7 1025 gphi *new_phi;
a509ebb5
RL
1026 basic_block store_bb;
1027 tree local_res;
f5045c96 1028 source_location locus;
a509ebb5 1029
b8698a0f
L
1030 /* STORE_BB is the block where the phi
1031 should be stored. It is the destination of the loop exit.
726a989a 1032 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
a509ebb5
RL
1033 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1034
1035 /* STORE_BB has two predecessors. One coming from the loop
1036 (the reduction's result is computed at the loop),
b8698a0f
L
1037 and another coming from a block preceding the loop,
1038 when no iterations
1039 are executed (the initial value should be taken). */
a509ebb5
RL
1040 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1041 e = EDGE_PRED (store_bb, 1);
1042 else
1043 e = EDGE_PRED (store_bb, 0);
b731b390 1044 local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt));
f5045c96 1045 locus = gimple_location (reduc->reduc_stmt);
a509ebb5 1046 new_phi = create_phi_node (local_res, store_bb);
9e227d60 1047 add_phi_arg (new_phi, reduc->init, e, locus);
726a989a 1048 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
9e227d60 1049 FALLTHRU_EDGE (loop->latch), locus);
a509ebb5
RL
1050 reduc->new_phi = new_phi;
1051
1052 return 1;
1053}
5f40b3cb
ZD
1054
1055struct clsn_data
1056{
1057 tree store;
1058 tree load;
1059
1060 basic_block store_bb;
1061 basic_block load_bb;
1062};
1063
a509ebb5 1064/* Callback for htab_traverse. Create an atomic instruction for the
b8698a0f 1065 reduction described in SLOT.
a509ebb5
RL
1066 DATA annotates the place in memory the atomic operation relates to,
1067 and the basic block it needs to be generated in. */
1068
4a8fb1a1
LC
1069int
1070create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
a509ebb5 1071{
4a8fb1a1 1072 struct reduction_info *const reduc = *slot;
726a989a 1073 gimple_stmt_iterator gsi;
a509ebb5 1074 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
a509ebb5
RL
1075 tree load_struct;
1076 basic_block bb;
1077 basic_block new_bb;
1078 edge e;
0f900dfa 1079 tree t, addr, ref, x;
726a989a
RB
1080 tree tmp_load, name;
1081 gimple load;
a509ebb5 1082
70f34814 1083 load_struct = build_simple_mem_ref (clsn_data->load);
a509ebb5 1084 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
a509ebb5
RL
1085
1086 addr = build_addr (t, current_function_decl);
1087
1088 /* Create phi node. */
1089 bb = clsn_data->load_bb;
1090
b13c907a
RB
1091 gsi = gsi_last_bb (bb);
1092 e = split_block (bb, gsi_stmt (gsi));
a509ebb5
RL
1093 new_bb = e->dest;
1094
b731b390
JJ
1095 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)));
1096 tmp_load = make_ssa_name (tmp_load);
726a989a 1097 load = gimple_build_omp_atomic_load (tmp_load, addr);
a509ebb5 1098 SSA_NAME_DEF_STMT (tmp_load) = load;
726a989a
RB
1099 gsi = gsi_start_bb (new_bb);
1100 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
a509ebb5
RL
1101
1102 e = split_block (new_bb, load);
1103 new_bb = e->dest;
726a989a 1104 gsi = gsi_start_bb (new_bb);
a509ebb5 1105 ref = tmp_load;
726a989a
RB
1106 x = fold_build2 (reduc->reduction_code,
1107 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1108 PHI_RESULT (reduc->new_phi));
a509ebb5 1109
726a989a
RB
1110 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1111 GSI_CONTINUE_LINKING);
a509ebb5 1112
726a989a 1113 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
a509ebb5
RL
1114 return 1;
1115}
1116
b8698a0f
L
1117/* Create the atomic operation at the join point of the threads.
1118 REDUCTION_LIST describes the reductions in the LOOP.
1119 LD_ST_DATA describes the shared data structure where
a509ebb5
RL
1120 shared data is stored in and loaded from. */
1121static void
4a8fb1a1 1122create_call_for_reduction (struct loop *loop,
c203e8a7 1123 reduction_info_table_type *reduction_list,
a509ebb5
RL
1124 struct clsn_data *ld_st_data)
1125{
c203e8a7 1126 reduction_list->traverse <struct loop *, create_phi_for_local_result> (loop);
726a989a 1127 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
a509ebb5 1128 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
4a8fb1a1 1129 reduction_list
c203e8a7 1130 ->traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
a509ebb5
RL
1131}
1132
ae0bce62
RL
1133/* Callback for htab_traverse. Loads the final reduction value at the
1134 join point of all threads, and inserts it in the right place. */
a509ebb5 1135
4a8fb1a1
LC
1136int
1137create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
a509ebb5 1138{
4a8fb1a1 1139 struct reduction_info *const red = *slot;
726a989a
RB
1140 gimple stmt;
1141 gimple_stmt_iterator gsi;
1142 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
a509ebb5 1143 tree load_struct;
ae0bce62 1144 tree name;
a509ebb5
RL
1145 tree x;
1146
726a989a 1147 gsi = gsi_after_labels (clsn_data->load_bb);
70f34814 1148 load_struct = build_simple_mem_ref (clsn_data->load);
a509ebb5
RL
1149 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1150 NULL_TREE);
a509ebb5 1151
ae0bce62 1152 x = load_struct;
a509ebb5 1153 name = PHI_RESULT (red->keep_res);
726a989a 1154 stmt = gimple_build_assign (name, x);
a509ebb5 1155
726a989a 1156 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
a509ebb5 1157
726a989a
RB
1158 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1159 !gsi_end_p (gsi); gsi_next (&gsi))
1160 if (gsi_stmt (gsi) == red->keep_res)
1161 {
1162 remove_phi_node (&gsi, false);
1163 return 1;
1164 }
1165 gcc_unreachable ();
a509ebb5
RL
1166}
1167
b8698a0f 1168/* Load the reduction result that was stored in LD_ST_DATA.
a509ebb5 1169 REDUCTION_LIST describes the list of reductions that the
fa10beec 1170 loads should be generated for. */
a509ebb5 1171static void
c203e8a7 1172create_final_loads_for_reduction (reduction_info_table_type *reduction_list,
a509ebb5
RL
1173 struct clsn_data *ld_st_data)
1174{
726a989a 1175 gimple_stmt_iterator gsi;
a509ebb5 1176 tree t;
726a989a 1177 gimple stmt;
a509ebb5 1178
726a989a 1179 gsi = gsi_after_labels (ld_st_data->load_bb);
a509ebb5 1180 t = build_fold_addr_expr (ld_st_data->store);
726a989a 1181 stmt = gimple_build_assign (ld_st_data->load, t);
a509ebb5 1182
726a989a 1183 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
a509ebb5 1184
4a8fb1a1 1185 reduction_list
c203e8a7 1186 ->traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
a509ebb5
RL
1187
1188}
1189
0eb7e7aa
RL
1190/* Callback for htab_traverse. Store the neutral value for the
1191 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1192 1 for MULT_EXPR, etc. into the reduction field.
b8698a0f
L
1193 The reduction is specified in SLOT. The store information is
1194 passed in DATA. */
0eb7e7aa 1195
4a8fb1a1
LC
1196int
1197create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
0eb7e7aa 1198{
4a8fb1a1 1199 struct reduction_info *const red = *slot;
726a989a
RB
1200 tree t;
1201 gimple stmt;
1202 gimple_stmt_iterator gsi;
1203 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1204
1205 gsi = gsi_last_bb (clsn_data->store_bb);
1206 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1207 stmt = gimple_build_assign (t, red->initial_value);
726a989a 1208 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
0eb7e7aa
RL
1209
1210 return 1;
1211}
1212
a509ebb5
RL
1213/* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1214 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1215 specified in SLOT. */
1216
4a8fb1a1
LC
1217int
1218create_loads_and_stores_for_name (name_to_copy_elt **slot,
1219 struct clsn_data *clsn_data)
5f40b3cb 1220{
4a8fb1a1 1221 struct name_to_copy_elt *const elt = *slot;
726a989a
RB
1222 tree t;
1223 gimple stmt;
1224 gimple_stmt_iterator gsi;
5f40b3cb 1225 tree type = TREE_TYPE (elt->new_name);
5f40b3cb
ZD
1226 tree load_struct;
1227
726a989a
RB
1228 gsi = gsi_last_bb (clsn_data->store_bb);
1229 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1230 stmt = gimple_build_assign (t, ssa_name (elt->version));
726a989a 1231 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb 1232
726a989a 1233 gsi = gsi_last_bb (clsn_data->load_bb);
70f34814 1234 load_struct = build_simple_mem_ref (clsn_data->load);
726a989a
RB
1235 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1236 stmt = gimple_build_assign (elt->new_name, t);
726a989a 1237 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1238
1239 return 1;
1240}
1241
1242/* Moves all the variables used in LOOP and defined outside of it (including
1243 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1244 name) to a structure created for this purpose. The code
b8698a0f 1245
5f40b3cb
ZD
1246 while (1)
1247 {
1248 use (a);
1249 use (b);
1250 }
1251
1252 is transformed this way:
1253
1254 bb0:
1255 old.a = a;
1256 old.b = b;
1257
1258 bb1:
1259 a' = new->a;
1260 b' = new->b;
1261 while (1)
1262 {
1263 use (a');
1264 use (b');
1265 }
1266
1267 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1268 pointer `new' is intentionally not initialized (the loop will be split to a
1269 separate function later, and `new' will be initialized from its arguments).
a509ebb5 1270 LD_ST_DATA holds information about the shared data structure used to pass
b8698a0f
L
1271 information among the threads. It is initialized here, and
1272 gen_parallel_loop will pass it to create_call_for_reduction that
1273 needs this information. REDUCTION_LIST describes the reductions
a509ebb5 1274 in LOOP. */
5f40b3cb
ZD
1275
1276static void
4a8fb1a1 1277separate_decls_in_region (edge entry, edge exit,
c203e8a7 1278 reduction_info_table_type *reduction_list,
b8698a0f 1279 tree *arg_struct, tree *new_arg_struct,
9f9f72aa 1280 struct clsn_data *ld_st_data)
a509ebb5 1281
5f40b3cb 1282{
9f9f72aa 1283 basic_block bb1 = split_edge (entry);
5f40b3cb 1284 basic_block bb0 = single_pred (bb1);
c203e8a7
TS
1285 name_to_copy_table_type name_copies (10);
1286 int_tree_htab_type decl_copies (10);
5f40b3cb 1287 unsigned i;
726a989a
RB
1288 tree type, type_name, nvar;
1289 gimple_stmt_iterator gsi;
5f40b3cb 1290 struct clsn_data clsn_data;
00f96dc9 1291 auto_vec<basic_block, 3> body;
9f9f72aa
AP
1292 basic_block bb;
1293 basic_block entry_bb = bb1;
1294 basic_block exit_bb = exit->dest;
b5b8b0ac 1295 bool has_debug_stmt = false;
5f40b3cb 1296
726a989a 1297 entry = single_succ_edge (entry_bb);
9f9f72aa 1298 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 1299
9771b263 1300 FOR_EACH_VEC_ELT (body, i, bb)
9f9f72aa 1301 {
b8698a0f 1302 if (bb != entry_bb && bb != exit_bb)
9f9f72aa 1303 {
726a989a
RB
1304 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1305 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
c203e8a7 1306 &name_copies, &decl_copies);
726a989a
RB
1307
1308 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
b5b8b0ac
AO
1309 {
1310 gimple stmt = gsi_stmt (gsi);
1311
1312 if (is_gimple_debug (stmt))
1313 has_debug_stmt = true;
1314 else
1315 separate_decls_in_region_stmt (entry, exit, stmt,
c203e8a7 1316 &name_copies, &decl_copies);
b5b8b0ac 1317 }
9f9f72aa 1318 }
5f40b3cb 1319 }
9f9f72aa 1320
b5b8b0ac
AO
1321 /* Now process debug bind stmts. We must not create decls while
1322 processing debug stmts, so we defer their processing so as to
1323 make sure we will have debug info for as many variables as
1324 possible (all of those that were dealt with in the loop above),
1325 and discard those for which we know there's nothing we can
1326 do. */
1327 if (has_debug_stmt)
9771b263 1328 FOR_EACH_VEC_ELT (body, i, bb)
b5b8b0ac
AO
1329 if (bb != entry_bb && bb != exit_bb)
1330 {
1331 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1332 {
1333 gimple stmt = gsi_stmt (gsi);
1334
ddb555ed 1335 if (is_gimple_debug (stmt))
b5b8b0ac 1336 {
c203e8a7
TS
1337 if (separate_decls_in_region_debug (stmt, &name_copies,
1338 &decl_copies))
b5b8b0ac
AO
1339 {
1340 gsi_remove (&gsi, true);
1341 continue;
1342 }
1343 }
1344
1345 gsi_next (&gsi);
1346 }
1347 }
1348
c203e8a7 1349 if (name_copies.elements () == 0 && reduction_list->elements () == 0)
5f40b3cb
ZD
1350 {
1351 /* It may happen that there is nothing to copy (if there are only
a509ebb5 1352 loop carried and external variables in the loop). */
5f40b3cb
ZD
1353 *arg_struct = NULL;
1354 *new_arg_struct = NULL;
1355 }
1356 else
1357 {
1358 /* Create the type for the structure to store the ssa names to. */
1359 type = lang_hooks.types.make_type (RECORD_TYPE);
9ff70652 1360 type_name = build_decl (UNKNOWN_LOCATION,
c2255bc4 1361 TYPE_DECL, create_tmp_var_name (".paral_data"),
5f40b3cb
ZD
1362 type);
1363 TYPE_NAME (type) = type_name;
1364
4a8fb1a1 1365 name_copies.traverse <tree, add_field_for_name> (type);
c203e8a7 1366 if (reduction_list && reduction_list->elements () > 0)
0eb7e7aa
RL
1367 {
1368 /* Create the fields for reductions. */
c203e8a7 1369 reduction_list->traverse <tree, add_field_for_reduction> (type);
0eb7e7aa 1370 }
5f40b3cb 1371 layout_type (type);
b8698a0f 1372
5f40b3cb
ZD
1373 /* Create the loads and stores. */
1374 *arg_struct = create_tmp_var (type, ".paral_data_store");
5f40b3cb 1375 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
b731b390 1376 *new_arg_struct = make_ssa_name (nvar);
5f40b3cb 1377
a509ebb5
RL
1378 ld_st_data->store = *arg_struct;
1379 ld_st_data->load = *new_arg_struct;
1380 ld_st_data->store_bb = bb0;
1381 ld_st_data->load_bb = bb1;
0eb7e7aa 1382
4a8fb1a1
LC
1383 name_copies
1384 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1385 (ld_st_data);
a509ebb5 1386
ae0bce62
RL
1387 /* Load the calculation from memory (after the join of the threads). */
1388
c203e8a7 1389 if (reduction_list && reduction_list->elements () > 0)
a509ebb5 1390 {
4a8fb1a1 1391 reduction_list
c203e8a7
TS
1392 ->traverse <struct clsn_data *, create_stores_for_reduction>
1393 (ld_st_data);
b731b390 1394 clsn_data.load = make_ssa_name (nvar);
9f9f72aa 1395 clsn_data.load_bb = exit->dest;
a509ebb5
RL
1396 clsn_data.store = ld_st_data->store;
1397 create_final_loads_for_reduction (reduction_list, &clsn_data);
1398 }
5f40b3cb 1399 }
5f40b3cb
ZD
1400}
1401
a79b7ec5 1402/* Returns true if FN was created to run in parallel. */
5f40b3cb 1403
62e0a1ed 1404bool
a79b7ec5 1405parallelized_function_p (tree fndecl)
5f40b3cb 1406{
a79b7ec5
TV
1407 cgraph_node *node = cgraph_node::get (fndecl);
1408 gcc_assert (node != NULL);
1409 return node->parallelized_function;
5f40b3cb
ZD
1410}
1411
1412/* Creates and returns an empty function that will receive the body of
1413 a parallelized loop. */
1414
1415static tree
9ff70652 1416create_loop_fn (location_t loc)
5f40b3cb
ZD
1417{
1418 char buf[100];
1419 char *tname;
1420 tree decl, type, name, t;
1421 struct function *act_cfun = cfun;
1422 static unsigned loopfn_num;
1423
5368224f 1424 loc = LOCATION_LOCUS (loc);
5f40b3cb
ZD
1425 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1426 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1427 clean_symbol_name (tname);
1428 name = get_identifier (tname);
1429 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1430
9ff70652 1431 decl = build_decl (loc, FUNCTION_DECL, name, type);
5f40b3cb
ZD
1432 TREE_STATIC (decl) = 1;
1433 TREE_USED (decl) = 1;
1434 DECL_ARTIFICIAL (decl) = 1;
1435 DECL_IGNORED_P (decl) = 0;
1436 TREE_PUBLIC (decl) = 0;
1437 DECL_UNINLINABLE (decl) = 1;
1438 DECL_EXTERNAL (decl) = 0;
1439 DECL_CONTEXT (decl) = NULL_TREE;
1440 DECL_INITIAL (decl) = make_node (BLOCK);
1441
9ff70652 1442 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
5f40b3cb
ZD
1443 DECL_ARTIFICIAL (t) = 1;
1444 DECL_IGNORED_P (t) = 1;
1445 DECL_RESULT (decl) = t;
1446
9ff70652 1447 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
5f40b3cb
ZD
1448 ptr_type_node);
1449 DECL_ARTIFICIAL (t) = 1;
1450 DECL_ARG_TYPE (t) = ptr_type_node;
1451 DECL_CONTEXT (t) = decl;
1452 TREE_USED (t) = 1;
1453 DECL_ARGUMENTS (decl) = t;
1454
182e0d71 1455 allocate_struct_function (decl, false);
5f40b3cb
ZD
1456
1457 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1458 it. */
5576d6f2 1459 set_cfun (act_cfun);
5f40b3cb
ZD
1460
1461 return decl;
1462}
1463
7c82d827
TV
1464/* Replace uses of NAME by VAL in block BB. */
1465
1466static void
1467replace_uses_in_bb_by (tree name, tree val, basic_block bb)
1468{
1469 gimple use_stmt;
1470 imm_use_iterator imm_iter;
1471
1472 FOR_EACH_IMM_USE_STMT (use_stmt, imm_iter, name)
1473 {
1474 if (gimple_bb (use_stmt) != bb)
1475 continue;
1476
1477 use_operand_p use_p;
1478 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
1479 SET_USE (use_p, val);
1480 }
1481}
1482
7c82d827
TV
1483/* Do transformation from:
1484
1485 <bb preheader>:
1486 ...
1487 goto <bb header>
1488
1489 <bb header>:
1490 ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1491 sum_a = PHI <sum_init (preheader), sum_b (latch)>
1492 ...
1493 use (ivtmp_a)
1494 ...
1495 sum_b = sum_a + sum_update
1496 ...
1497 if (ivtmp_a < n)
1498 goto <bb latch>;
1499 else
1500 goto <bb exit>;
1501
1502 <bb latch>:
1503 ivtmp_b = ivtmp_a + 1;
1504 goto <bb header>
1505
1506 <bb exit>:
712cb0bb 1507 sum_z = PHI <sum_b (cond[1]), ...>
7c82d827
TV
1508
1509 [1] Where <bb cond> is single_pred (bb latch); In the simplest case,
1510 that's <bb header>.
1511
1512 to:
1513
1514 <bb preheader>:
1515 ...
1516 goto <bb newheader>
1517
1518 <bb header>:
1519 ivtmp_a = PHI <ivtmp_c (latch)>
1520 sum_a = PHI <sum_c (latch)>
1521 ...
1522 use (ivtmp_a)
1523 ...
1524 sum_b = sum_a + sum_update
1525 ...
1526 goto <bb latch>;
1527
1528 <bb newheader>:
1529 ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1530 sum_c = PHI <sum_init (preheader), sum_b (latch)>
1531 if (ivtmp_c < n + 1)
1532 goto <bb header>;
1533 else
712cb0bb 1534 goto <bb newexit>;
7c82d827
TV
1535
1536 <bb latch>:
1537 ivtmp_b = ivtmp_a + 1;
1538 goto <bb newheader>
1539
712cb0bb
TV
1540 <bb newexit>:
1541 sum_y = PHI <sum_c (newheader)>
1542
7c82d827 1543 <bb exit>:
712cb0bb 1544 sum_z = PHI <sum_y (newexit), ...>
7c82d827
TV
1545
1546
1547 In unified diff format:
1548
1549 <bb preheader>:
1550 ...
1551- goto <bb header>
1552+ goto <bb newheader>
1553
1554 <bb header>:
1555- ivtmp_a = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1556- sum_a = PHI <sum_init (preheader), sum_b (latch)>
1557+ ivtmp_a = PHI <ivtmp_c (latch)>
1558+ sum_a = PHI <sum_c (latch)>
1559 ...
1560 use (ivtmp_a)
1561 ...
1562 sum_b = sum_a + sum_update
1563 ...
1564- if (ivtmp_a < n)
1565- goto <bb latch>;
1566+ goto <bb latch>;
1567+
1568+ <bb newheader>:
1569+ ivtmp_c = PHI <ivtmp_init (preheader), ivtmp_b (latch)>
1570+ sum_c = PHI <sum_init (preheader), sum_b (latch)>
1571+ if (ivtmp_c < n + 1)
1572+ goto <bb header>;
1573 else
1574 goto <bb exit>;
1575
1576 <bb latch>:
1577 ivtmp_b = ivtmp_a + 1;
1578- goto <bb header>
1579+ goto <bb newheader>
1580
712cb0bb
TV
1581+ <bb newexit>:
1582+ sum_y = PHI <sum_c (newheader)>
1583
7c82d827 1584 <bb exit>:
712cb0bb
TV
1585- sum_z = PHI <sum_b (cond[1]), ...>
1586+ sum_z = PHI <sum_y (newexit), ...>
7c82d827
TV
1587
1588 Note: the example does not show any virtual phis, but these are handled more
1589 or less as reductions.
b8698a0f 1590
7c82d827
TV
1591
1592 Moves the exit condition of LOOP to the beginning of its header.
1593 REDUCTION_LIST describes the reductions in LOOP. BOUND is the new loop
1594 bound. */
1595
1596static void
1597transform_to_exit_first_loop_alt (struct loop *loop,
1598 reduction_info_table_type *reduction_list,
1599 tree bound)
1600{
1601 basic_block header = loop->header;
1602 basic_block latch = loop->latch;
1603 edge exit = single_dom_exit (loop);
1604 basic_block exit_block = exit->dest;
1605 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1606 tree control = gimple_cond_lhs (cond_stmt);
1607 edge e;
1608
338392ed
TV
1609 /* Rewriting virtuals into loop-closed ssa normal form makes this
1610 transformation simpler. It also ensures that the virtuals are in
1611 loop-closed ssa normal from after the transformation, which is required by
1612 create_parallel_loop. */
1613 rewrite_virtuals_into_loop_closed_ssa (loop);
7c82d827
TV
1614
1615 /* Create the new_header block. */
1616 basic_block new_header = split_block_before_cond_jump (exit->src);
712cb0bb 1617 edge edge_at_split = single_pred_edge (new_header);
7c82d827
TV
1618
1619 /* Redirect entry edge to new_header. */
1620 edge entry = loop_preheader_edge (loop);
1621 e = redirect_edge_and_branch (entry, new_header);
1622 gcc_assert (e == entry);
1623
1624 /* Redirect post_inc_edge to new_header. */
1625 edge post_inc_edge = single_succ_edge (latch);
1626 e = redirect_edge_and_branch (post_inc_edge, new_header);
1627 gcc_assert (e == post_inc_edge);
1628
1629 /* Redirect post_cond_edge to header. */
1630 edge post_cond_edge = single_pred_edge (latch);
1631 e = redirect_edge_and_branch (post_cond_edge, header);
1632 gcc_assert (e == post_cond_edge);
1633
712cb0bb
TV
1634 /* Redirect edge_at_split to latch. */
1635 e = redirect_edge_and_branch (edge_at_split, latch);
1636 gcc_assert (e == edge_at_split);
7c82d827
TV
1637
1638 /* Set the new loop bound. */
1639 gimple_cond_set_rhs (cond_stmt, bound);
5a5fd951 1640 update_stmt (cond_stmt);
7c82d827
TV
1641
1642 /* Repair the ssa. */
1643 vec<edge_var_map> *v = redirect_edge_var_map_vector (post_inc_edge);
1644 edge_var_map *vm;
1645 gphi_iterator gsi;
338392ed 1646 int i;
7c82d827
TV
1647 for (gsi = gsi_start_phis (header), i = 0;
1648 !gsi_end_p (gsi) && v->iterate (i, &vm);
1649 gsi_next (&gsi), i++)
1650 {
1651 gphi *phi = gsi.phi ();
1652 tree res_a = PHI_RESULT (phi);
1653
1654 /* Create new phi. */
1655 tree res_c = copy_ssa_name (res_a, phi);
1656 gphi *nphi = create_phi_node (res_c, new_header);
1657
1658 /* Replace ivtmp_a with ivtmp_c in condition 'if (ivtmp_a < n)'. */
1659 replace_uses_in_bb_by (res_a, res_c, new_header);
1660
1661 /* Replace ivtmp/sum_b with ivtmp/sum_c in header phi. */
1662 add_phi_arg (phi, res_c, post_cond_edge, UNKNOWN_LOCATION);
1663
338392ed 1664 /* Replace sum_b with sum_c in exit phi. */
7c82d827 1665 tree res_b = redirect_edge_var_map_def (vm);
338392ed 1666 replace_uses_in_bb_by (res_b, res_c, exit_block);
7c82d827
TV
1667
1668 struct reduction_info *red = reduction_phi (reduction_list, phi);
1669 gcc_assert (virtual_operand_p (res_a)
1670 || res_a == control
1671 || red != NULL);
1672
1673 if (red)
1674 {
1675 /* Register the new reduction phi. */
1676 red->reduc_phi = nphi;
1677 gimple_set_uid (red->reduc_phi, red->reduc_version);
1678 }
1679 }
1680 gcc_assert (gsi_end_p (gsi) && !v->iterate (i, &vm));
7c82d827
TV
1681
1682 /* Set the preheader argument of the new phis to ivtmp/sum_init. */
1683 flush_pending_stmts (entry);
1684
1685 /* Set the latch arguments of the new phis to ivtmp/sum_b. */
1686 flush_pending_stmts (post_inc_edge);
1687
712cb0bb
TV
1688 /* Create a new empty exit block, inbetween the new loop header and the old
1689 exit block. The function separate_decls_in_region needs this block to
1690 insert code that is active on loop exit, but not any other path. */
1691 basic_block new_exit_block = split_edge (exit);
1692
1693 /* Insert and register the reduction exit phis. */
7c82d827
TV
1694 for (gphi_iterator gsi = gsi_start_phis (exit_block);
1695 !gsi_end_p (gsi);
1696 gsi_next (&gsi))
1697 {
1698 gphi *phi = gsi.phi ();
1699 tree res_z = PHI_RESULT (phi);
712cb0bb
TV
1700
1701 /* Now that we have a new exit block, duplicate the phi of the old exit
1702 block in the new exit block to preserve loop-closed ssa. */
1703 edge succ_new_exit_block = single_succ_edge (new_exit_block);
1704 edge pred_new_exit_block = single_pred_edge (new_exit_block);
1705 tree res_y = copy_ssa_name (res_z, phi);
1706 gphi *nphi = create_phi_node (res_y, new_exit_block);
1707 tree res_c = PHI_ARG_DEF_FROM_EDGE (phi, succ_new_exit_block);
1708 add_phi_arg (nphi, res_c, pred_new_exit_block, UNKNOWN_LOCATION);
1709 add_phi_arg (phi, res_y, succ_new_exit_block, UNKNOWN_LOCATION);
1710
7c82d827
TV
1711 if (virtual_operand_p (res_z))
1712 continue;
1713
7c82d827
TV
1714 gimple reduc_phi = SSA_NAME_DEF_STMT (res_c);
1715 struct reduction_info *red = reduction_phi (reduction_list, reduc_phi);
1716 if (red != NULL)
712cb0bb 1717 red->keep_res = nphi;
7c82d827
TV
1718 }
1719
1720 /* We're going to cancel the loop at the end of gen_parallel_loop, but until
1721 then we're still using some fields, so only bother about fields that are
1722 still used: header and latch.
1723 The loop has a new header bb, so we update it. The latch bb stays the
1724 same. */
1725 loop->header = new_header;
1726
1727 /* Recalculate dominance info. */
1728 free_dominance_info (CDI_DOMINATORS);
1729 calculate_dominance_info (CDI_DOMINATORS);
1730}
1731
1732/* Tries to moves the exit condition of LOOP to the beginning of its header
1733 without duplication of the loop body. NIT is the number of iterations of the
1734 loop. REDUCTION_LIST describes the reductions in LOOP. Return true if
1735 transformation is successful. */
1736
1737static bool
1738try_transform_to_exit_first_loop_alt (struct loop *loop,
1739 reduction_info_table_type *reduction_list,
1740 tree nit)
1741{
1742 /* Check whether the latch contains a single statement. */
1b7f61eb
TV
1743 if (!gimple_seq_nondebug_singleton_p (bb_seq (loop->latch)))
1744 return false;
7c82d827
TV
1745
1746 /* Check whether the latch contains the loop iv increment. */
1747 edge back = single_succ_edge (loop->latch);
1748 edge exit = single_dom_exit (loop);
1749 gcond *cond_stmt = as_a <gcond *> (last_stmt (exit->src));
1750 tree control = gimple_cond_lhs (cond_stmt);
1751 gphi *phi = as_a <gphi *> (SSA_NAME_DEF_STMT (control));
1752 tree inc_res = gimple_phi_arg_def (phi, back->dest_idx);
1753 if (gimple_bb (SSA_NAME_DEF_STMT (inc_res)) != loop->latch)
1754 return false;
1755
1756 /* Check whether there's no code between the loop condition and the latch. */
1757 if (!single_pred_p (loop->latch)
1758 || single_pred (loop->latch) != exit->src)
1759 return false;
1760
1761 tree alt_bound = NULL_TREE;
1762 tree nit_type = TREE_TYPE (nit);
1763
1764 /* Figure out whether nit + 1 overflows. */
1765 if (TREE_CODE (nit) == INTEGER_CST)
1766 {
1767 if (!tree_int_cst_equal (nit, TYPE_MAXVAL (nit_type)))
1768 {
1769 alt_bound = fold_build2_loc (UNKNOWN_LOCATION, PLUS_EXPR, nit_type,
1770 nit, build_one_cst (nit_type));
1771
1772 gcc_assert (TREE_CODE (alt_bound) == INTEGER_CST);
fd7b3ef5
TV
1773 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1774 return true;
7c82d827
TV
1775 }
1776 else
1777 {
1778 /* Todo: Figure out if we can trigger this, if it's worth to handle
1779 optimally, and if we can handle it optimally. */
fd7b3ef5 1780 return false;
7c82d827
TV
1781 }
1782 }
7c82d827 1783
fd7b3ef5 1784 gcc_assert (TREE_CODE (nit) == SSA_NAME);
7c82d827 1785
4f75d608
TV
1786 /* Variable nit is the loop bound as returned by canonicalize_loop_ivs, for an
1787 iv with base 0 and step 1 that is incremented in the latch, like this:
1788
1789 <bb header>:
1790 # iv_1 = PHI <0 (preheader), iv_2 (latch)>
1791 ...
1792 if (iv_1 < nit)
1793 goto <bb latch>;
1794 else
1795 goto <bb exit>;
1796
1797 <bb latch>:
1798 iv_2 = iv_1 + 1;
1799 goto <bb header>;
1800
1801 The range of iv_1 is [0, nit]. The latch edge is taken for
1802 iv_1 == [0, nit - 1] and the exit edge is taken for iv_1 == nit. So the
1803 number of latch executions is equal to nit.
1804
1805 The function max_loop_iterations gives us the maximum number of latch
1806 executions, so it gives us the maximum value of nit. */
1807 widest_int nit_max;
1808 if (!max_loop_iterations (loop, &nit_max))
1809 return false;
1810
1811 /* Check if nit + 1 overflows. */
1812 widest_int type_max = wi::to_widest (TYPE_MAXVAL (nit_type));
1813 if (!wi::lts_p (nit_max, type_max))
1814 return false;
1815
fd7b3ef5 1816 gimple def = SSA_NAME_DEF_STMT (nit);
7c82d827 1817
4f75d608 1818 /* Try to find nit + 1, in the form of n in an assignment nit = n - 1. */
fd7b3ef5
TV
1819 if (def
1820 && is_gimple_assign (def)
1821 && gimple_assign_rhs_code (def) == PLUS_EXPR)
1822 {
1823 tree op1 = gimple_assign_rhs1 (def);
1824 tree op2 = gimple_assign_rhs2 (def);
1825 if (integer_minus_onep (op1))
1826 alt_bound = op2;
1827 else if (integer_minus_onep (op2))
1828 alt_bound = op1;
7c82d827
TV
1829 }
1830
9f620bf1 1831 /* If not found, insert nit + 1. */
7c82d827 1832 if (alt_bound == NULL_TREE)
9f620bf1
TV
1833 {
1834 alt_bound = fold_build2 (PLUS_EXPR, nit_type, nit,
1835 build_int_cst_type (nit_type, 1));
1836
1837 gimple_stmt_iterator gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
1838
1839 alt_bound
1840 = force_gimple_operand_gsi (&gsi, alt_bound, true, NULL_TREE, false,
1841 GSI_CONTINUE_LINKING);
1842 }
7c82d827
TV
1843
1844 transform_to_exit_first_loop_alt (loop, reduction_list, alt_bound);
1845 return true;
1846}
1847
1848/* Moves the exit condition of LOOP to the beginning of its header. NIT is the
1849 number of iterations of the loop. REDUCTION_LIST describes the reductions in
1850 LOOP. */
5f40b3cb
ZD
1851
1852static void
4a8fb1a1 1853transform_to_exit_first_loop (struct loop *loop,
c203e8a7 1854 reduction_info_table_type *reduction_list,
4a8fb1a1 1855 tree nit)
5f40b3cb
ZD
1856{
1857 basic_block *bbs, *nbbs, ex_bb, orig_header;
1858 unsigned n;
1859 bool ok;
1860 edge exit = single_dom_exit (loop), hpred;
726a989a 1861 tree control, control_name, res, t;
538dd0b7
DM
1862 gphi *phi, *nphi;
1863 gassign *stmt;
1864 gcond *cond_stmt, *cond_nit;
48710229 1865 tree nit_1;
5f40b3cb
ZD
1866
1867 split_block_after_labels (loop->header);
1868 orig_header = single_succ (loop->header);
1869 hpred = single_succ_edge (loop->header);
1870
538dd0b7 1871 cond_stmt = as_a <gcond *> (last_stmt (exit->src));
726a989a
RB
1872 control = gimple_cond_lhs (cond_stmt);
1873 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
5f40b3cb
ZD
1874
1875 /* Make sure that we have phi nodes on exit for all loop header phis
1876 (create_parallel_loop requires that). */
538dd0b7
DM
1877 for (gphi_iterator gsi = gsi_start_phis (loop->header);
1878 !gsi_end_p (gsi);
1879 gsi_next (&gsi))
5f40b3cb 1880 {
538dd0b7 1881 phi = gsi.phi ();
5f40b3cb 1882 res = PHI_RESULT (phi);
070ecdfd 1883 t = copy_ssa_name (res, phi);
5f40b3cb 1884 SET_PHI_RESULT (phi, t);
5f40b3cb 1885 nphi = create_phi_node (res, orig_header);
9e227d60 1886 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
5f40b3cb
ZD
1887
1888 if (res == control)
1889 {
726a989a 1890 gimple_cond_set_lhs (cond_stmt, t);
5f40b3cb
ZD
1891 update_stmt (cond_stmt);
1892 control = t;
1893 }
1894 }
12037899 1895
5f40b3cb 1896 bbs = get_loop_body_in_dom_order (loop);
48710229 1897
69958396
RL
1898 for (n = 0; bbs[n] != exit->src; n++)
1899 continue;
5f40b3cb 1900 nbbs = XNEWVEC (basic_block, n);
726a989a
RB
1901 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1902 bbs + 1, n, nbbs);
5f40b3cb
ZD
1903 gcc_assert (ok);
1904 free (bbs);
1905 ex_bb = nbbs[0];
1906 free (nbbs);
1907
b8698a0f 1908 /* Other than reductions, the only gimple reg that should be copied
726a989a 1909 out of the loop is the control variable. */
69958396 1910 exit = single_dom_exit (loop);
5f40b3cb 1911 control_name = NULL_TREE;
538dd0b7
DM
1912 for (gphi_iterator gsi = gsi_start_phis (ex_bb);
1913 !gsi_end_p (gsi); )
5f40b3cb 1914 {
538dd0b7 1915 phi = gsi.phi ();
5f40b3cb 1916 res = PHI_RESULT (phi);
ea057359 1917 if (virtual_operand_p (res))
726a989a
RB
1918 {
1919 gsi_next (&gsi);
1920 continue;
1921 }
5f40b3cb 1922
a509ebb5 1923 /* Check if it is a part of reduction. If it is,
b8698a0f
L
1924 keep the phi at the reduction's keep_res field. The
1925 PHI_RESULT of this phi is the resulting value of the reduction
a509ebb5
RL
1926 variable when exiting the loop. */
1927
c203e8a7 1928 if (reduction_list->elements () > 0)
a509ebb5
RL
1929 {
1930 struct reduction_info *red;
1931
1932 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
a509ebb5
RL
1933 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1934 if (red)
726a989a
RB
1935 {
1936 red->keep_res = phi;
1937 gsi_next (&gsi);
1938 continue;
1939 }
a509ebb5 1940 }
726a989a
RB
1941 gcc_assert (control_name == NULL_TREE
1942 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
5f40b3cb 1943 control_name = res;
726a989a 1944 remove_phi_node (&gsi, false);
5f40b3cb
ZD
1945 }
1946 gcc_assert (control_name != NULL_TREE);
5f40b3cb 1947
b8698a0f 1948 /* Initialize the control variable to number of iterations
48710229 1949 according to the rhs of the exit condition. */
538dd0b7
DM
1950 gimple_stmt_iterator gsi = gsi_after_labels (ex_bb);
1951 cond_nit = as_a <gcond *> (last_stmt (exit->src));
48710229
RL
1952 nit_1 = gimple_cond_rhs (cond_nit);
1953 nit_1 = force_gimple_operand_gsi (&gsi,
1954 fold_convert (TREE_TYPE (control_name), nit_1),
726a989a 1955 false, NULL_TREE, false, GSI_SAME_STMT);
48710229 1956 stmt = gimple_build_assign (control_name, nit_1);
726a989a 1957 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1958}
1959
1960/* Create the parallel constructs for LOOP as described in gen_parallel_loop.
726a989a 1961 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
5f40b3cb
ZD
1962 NEW_DATA is the variable that should be initialized from the argument
1963 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
726a989a 1964 basic block containing GIMPLE_OMP_PARALLEL tree. */
5f40b3cb
ZD
1965
1966static basic_block
1967create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
9ff70652 1968 tree new_data, unsigned n_threads, location_t loc)
5f40b3cb 1969{
726a989a 1970 gimple_stmt_iterator gsi;
5f40b3cb 1971 basic_block bb, paral_bb, for_bb, ex_bb;
0f900dfa 1972 tree t, param;
538dd0b7
DM
1973 gomp_parallel *omp_par_stmt;
1974 gimple omp_return_stmt1, omp_return_stmt2;
1975 gimple phi;
1976 gcond *cond_stmt;
1977 gomp_for *for_stmt;
1978 gomp_continue *omp_cont_stmt;
726a989a 1979 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
5f40b3cb
ZD
1980 edge exit, nexit, guard, end, e;
1981
726a989a 1982 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
5f40b3cb
ZD
1983 bb = loop_preheader_edge (loop)->src;
1984 paral_bb = single_pred (bb);
726a989a 1985 gsi = gsi_last_bb (paral_bb);
5f40b3cb 1986
9ff70652 1987 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
5f40b3cb 1988 OMP_CLAUSE_NUM_THREADS_EXPR (t)
a509ebb5 1989 = build_int_cst (integer_type_node, n_threads);
538dd0b7
DM
1990 omp_par_stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
1991 gimple_set_location (omp_par_stmt, loc);
5f40b3cb 1992
538dd0b7 1993 gsi_insert_after (&gsi, omp_par_stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1994
1995 /* Initialize NEW_DATA. */
1996 if (data)
1997 {
538dd0b7
DM
1998 gassign *assign_stmt;
1999
726a989a
RB
2000 gsi = gsi_after_labels (bb);
2001
b731b390 2002 param = make_ssa_name (DECL_ARGUMENTS (loop_fn));
538dd0b7
DM
2003 assign_stmt = gimple_build_assign (param, build_fold_addr_expr (data));
2004 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
726a989a 2005
538dd0b7 2006 assign_stmt = gimple_build_assign (new_data,
726a989a 2007 fold_convert (TREE_TYPE (new_data), param));
538dd0b7 2008 gsi_insert_before (&gsi, assign_stmt, GSI_SAME_STMT);
5f40b3cb
ZD
2009 }
2010
726a989a 2011 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
5f40b3cb 2012 bb = split_loop_exit_edge (single_dom_exit (loop));
726a989a 2013 gsi = gsi_last_bb (bb);
538dd0b7
DM
2014 omp_return_stmt1 = gimple_build_omp_return (false);
2015 gimple_set_location (omp_return_stmt1, loc);
2016 gsi_insert_after (&gsi, omp_return_stmt1, GSI_NEW_STMT);
5f40b3cb 2017
726a989a 2018 /* Extract data for GIMPLE_OMP_FOR. */
5f40b3cb 2019 gcc_assert (loop->header == single_dom_exit (loop)->src);
538dd0b7 2020 cond_stmt = as_a <gcond *> (last_stmt (loop->header));
5f40b3cb 2021
726a989a 2022 cvar = gimple_cond_lhs (cond_stmt);
5f40b3cb
ZD
2023 cvar_base = SSA_NAME_VAR (cvar);
2024 phi = SSA_NAME_DEF_STMT (cvar);
2025 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
b731b390 2026 initvar = copy_ssa_name (cvar);
5f40b3cb
ZD
2027 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
2028 initvar);
2029 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
2030
1dff453d 2031 gsi = gsi_last_nondebug_bb (loop->latch);
726a989a
RB
2032 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
2033 gsi_remove (&gsi, true);
5f40b3cb
ZD
2034
2035 /* Prepare cfg. */
2036 for_bb = split_edge (loop_preheader_edge (loop));
2037 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
2038 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
2039 gcc_assert (exit == single_dom_exit (loop));
2040
2041 guard = make_edge (for_bb, ex_bb, 0);
2042 single_succ_edge (loop->latch)->flags = 0;
2043 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
538dd0b7
DM
2044 for (gphi_iterator gpi = gsi_start_phis (ex_bb);
2045 !gsi_end_p (gpi); gsi_next (&gpi))
5f40b3cb 2046 {
f5045c96
AM
2047 source_location locus;
2048 tree def;
538dd0b7
DM
2049 gphi *phi = gpi.phi ();
2050 gphi *stmt;
2051
2052 stmt = as_a <gphi *> (
2053 SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit)));
f5045c96
AM
2054
2055 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
b8698a0f 2056 locus = gimple_phi_arg_location_from_edge (stmt,
f5045c96 2057 loop_preheader_edge (loop));
9e227d60 2058 add_phi_arg (phi, def, guard, locus);
f5045c96
AM
2059
2060 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
2061 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
9e227d60 2062 add_phi_arg (phi, def, end, locus);
5f40b3cb
ZD
2063 }
2064 e = redirect_edge_and_branch (exit, nexit->dest);
2065 PENDING_STMT (e) = NULL;
2066
726a989a
RB
2067 /* Emit GIMPLE_OMP_FOR. */
2068 gimple_cond_set_lhs (cond_stmt, cvar_base);
5f40b3cb 2069 type = TREE_TYPE (cvar);
9ff70652 2070 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
5f40b3cb
ZD
2071 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
2072
74bf76ed 2073 for_stmt = gimple_build_omp_for (NULL, GF_OMP_FOR_KIND_FOR, t, 1, NULL);
9ff70652 2074 gimple_set_location (for_stmt, loc);
726a989a
RB
2075 gimple_omp_for_set_index (for_stmt, 0, initvar);
2076 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
2077 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
2078 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
2079 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
2080 cvar_base,
2081 build_int_cst (type, 1)));
2082
2083 gsi = gsi_last_bb (for_bb);
2084 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
5f40b3cb
ZD
2085 SSA_NAME_DEF_STMT (initvar) = for_stmt;
2086
726a989a
RB
2087 /* Emit GIMPLE_OMP_CONTINUE. */
2088 gsi = gsi_last_bb (loop->latch);
538dd0b7
DM
2089 omp_cont_stmt = gimple_build_omp_continue (cvar_next, cvar);
2090 gimple_set_location (omp_cont_stmt, loc);
2091 gsi_insert_after (&gsi, omp_cont_stmt, GSI_NEW_STMT);
2092 SSA_NAME_DEF_STMT (cvar_next) = omp_cont_stmt;
5f40b3cb 2093
726a989a
RB
2094 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
2095 gsi = gsi_last_bb (ex_bb);
538dd0b7
DM
2096 omp_return_stmt2 = gimple_build_omp_return (true);
2097 gimple_set_location (omp_return_stmt2, loc);
2098 gsi_insert_after (&gsi, omp_return_stmt2, GSI_NEW_STMT);
5f40b3cb 2099
cd7d9fd7
RG
2100 /* After the above dom info is hosed. Re-compute it. */
2101 free_dominance_info (CDI_DOMINATORS);
2102 calculate_dominance_info (CDI_DOMINATORS);
2103
5f40b3cb
ZD
2104 return paral_bb;
2105}
2106
08dab97a
RL
2107/* Generates code to execute the iterations of LOOP in N_THREADS
2108 threads in parallel.
2109
2110 NITER describes number of iterations of LOOP.
fa10beec 2111 REDUCTION_LIST describes the reductions existent in the LOOP. */
5f40b3cb
ZD
2112
2113static void
c203e8a7
TS
2114gen_parallel_loop (struct loop *loop,
2115 reduction_info_table_type *reduction_list,
a509ebb5 2116 unsigned n_threads, struct tree_niter_desc *niter)
5f40b3cb 2117{
5f40b3cb 2118 tree many_iterations_cond, type, nit;
726a989a
RB
2119 tree arg_struct, new_arg_struct;
2120 gimple_seq stmts;
9f9f72aa 2121 edge entry, exit;
a509ebb5 2122 struct clsn_data clsn_data;
5f40b3cb 2123 unsigned prob;
9ff70652
JJ
2124 location_t loc;
2125 gimple cond_stmt;
768da0da 2126 unsigned int m_p_thread=2;
5f40b3cb
ZD
2127
2128 /* From
2129
2130 ---------------------------------------------------------------------
2131 loop
2132 {
2133 IV = phi (INIT, IV + STEP)
2134 BODY1;
2135 if (COND)
2136 break;
2137 BODY2;
2138 }
2139 ---------------------------------------------------------------------
2140
2141 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
2142 we generate the following code:
2143
2144 ---------------------------------------------------------------------
2145
2146 if (MAY_BE_ZERO
a509ebb5
RL
2147 || NITER < MIN_PER_THREAD * N_THREADS)
2148 goto original;
5f40b3cb
ZD
2149
2150 BODY1;
2151 store all local loop-invariant variables used in body of the loop to DATA.
726a989a 2152 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
5f40b3cb 2153 load the variables from DATA.
726a989a 2154 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
5f40b3cb
ZD
2155 BODY2;
2156 BODY1;
726a989a
RB
2157 GIMPLE_OMP_CONTINUE;
2158 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
2159 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
5f40b3cb
ZD
2160 goto end;
2161
2162 original:
2163 loop
2164 {
2165 IV = phi (INIT, IV + STEP)
2166 BODY1;
2167 if (COND)
2168 break;
2169 BODY2;
2170 }
2171
2172 end:
2173
2174 */
2175
2176 /* Create two versions of the loop -- in the old one, we know that the
2177 number of iterations is large enough, and we will transform it into the
2178 loop that will be split to loop_fn, the new one will be used for the
2179 remaining iterations. */
a509ebb5 2180
768da0da
RL
2181 /* We should compute a better number-of-iterations value for outer loops.
2182 That is, if we have
2183
2184 for (i = 0; i < n; ++i)
2185 for (j = 0; j < m; ++j)
2186 ...
2187
2188 we should compute nit = n * m, not nit = n.
2189 Also may_be_zero handling would need to be adjusted. */
2190
5f40b3cb
ZD
2191 type = TREE_TYPE (niter->niter);
2192 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
2193 NULL_TREE);
2194 if (stmts)
726a989a 2195 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb 2196
768da0da
RL
2197 if (loop->inner)
2198 m_p_thread=2;
2199 else
2200 m_p_thread=MIN_PER_THREAD;
2201
2202 many_iterations_cond =
2203 fold_build2 (GE_EXPR, boolean_type_node,
2204 nit, build_int_cst (type, m_p_thread * n_threads));
2205
5f40b3cb 2206 many_iterations_cond
a509ebb5
RL
2207 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
2208 invert_truthvalue (unshare_expr (niter->may_be_zero)),
2209 many_iterations_cond);
5f40b3cb 2210 many_iterations_cond
a509ebb5 2211 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
5f40b3cb 2212 if (stmts)
726a989a 2213 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb
ZD
2214 if (!is_gimple_condexpr (many_iterations_cond))
2215 {
2216 many_iterations_cond
a509ebb5
RL
2217 = force_gimple_operand (many_iterations_cond, &stmts,
2218 true, NULL_TREE);
5f40b3cb 2219 if (stmts)
726a989a 2220 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb
ZD
2221 }
2222
2223 initialize_original_copy_tables ();
2224
2225 /* We assume that the loop usually iterates a lot. */
2226 prob = 4 * REG_BR_PROB_BASE / 5;
0f900dfa
JJ
2227 loop_version (loop, many_iterations_cond, NULL,
2228 prob, prob, REG_BR_PROB_BASE - prob, true);
5f40b3cb
ZD
2229 update_ssa (TODO_update_ssa);
2230 free_original_copy_tables ();
2231
2232 /* Base all the induction variables in LOOP on a single control one. */
c80a5403 2233 canonicalize_loop_ivs (loop, &nit, true);
5f40b3cb 2234
7c82d827
TV
2235 /* Ensure that the exit condition is the first statement in the loop.
2236 The common case is that latch of the loop is empty (apart from the
2237 increment) and immediately follows the loop exit test. Attempt to move the
2238 entry of the loop directly before the exit check and increase the number of
2239 iterations of the loop by one. */
2240 if (!try_transform_to_exit_first_loop_alt (loop, reduction_list, nit))
2241 {
2242 /* Fall back on the method that handles more cases, but duplicates the
2243 loop body: move the exit condition of LOOP to the beginning of its
2244 header, and duplicate the part of the last iteration that gets disabled
2245 to the exit of the loop. */
2246 transform_to_exit_first_loop (loop, reduction_list, nit);
2247 }
a509ebb5 2248
fa10beec 2249 /* Generate initializations for reductions. */
c203e8a7
TS
2250 if (reduction_list->elements () > 0)
2251 reduction_list->traverse <struct loop *, initialize_reductions> (loop);
5f40b3cb
ZD
2252
2253 /* Eliminate the references to local variables from the loop. */
9f9f72aa
AP
2254 gcc_assert (single_exit (loop));
2255 entry = loop_preheader_edge (loop);
2256 exit = single_dom_exit (loop);
5f40b3cb 2257
9f9f72aa 2258 eliminate_local_variables (entry, exit);
5f40b3cb
ZD
2259 /* In the old loop, move all variables non-local to the loop to a structure
2260 and back, and create separate decls for the variables used in loop. */
b8698a0f 2261 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
9f9f72aa 2262 &new_arg_struct, &clsn_data);
5f40b3cb
ZD
2263
2264 /* Create the parallel constructs. */
9ff70652
JJ
2265 loc = UNKNOWN_LOCATION;
2266 cond_stmt = last_stmt (loop->header);
2267 if (cond_stmt)
2268 loc = gimple_location (cond_stmt);
18751894
TV
2269 create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
2270 new_arg_struct, n_threads, loc);
c203e8a7 2271 if (reduction_list->elements () > 0)
a509ebb5 2272 create_call_for_reduction (loop, reduction_list, &clsn_data);
5f40b3cb
ZD
2273
2274 scev_reset ();
2275
2276 /* Cancel the loop (it is simpler to do it here rather than to teach the
2277 expander to do it). */
2278 cancel_loop_tree (loop);
2279
92a6bdbd
SP
2280 /* Free loop bound estimations that could contain references to
2281 removed statements. */
f0bd40b1 2282 FOR_EACH_LOOP (loop, 0)
92a6bdbd 2283 free_numbers_of_iterations_estimates_loop (loop);
5f40b3cb
ZD
2284}
2285
9857228c
SP
2286/* Returns true when LOOP contains vector phi nodes. */
2287
2288static bool
726a989a 2289loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
9857228c
SP
2290{
2291 unsigned i;
2292 basic_block *bbs = get_loop_body_in_dom_order (loop);
538dd0b7 2293 gphi_iterator gsi;
9857228c 2294 bool res = true;
9857228c
SP
2295
2296 for (i = 0; i < loop->num_nodes; i++)
726a989a 2297 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
538dd0b7 2298 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi.phi ()))) == VECTOR_TYPE)
9857228c
SP
2299 goto end;
2300
2301 res = false;
2302 end:
2303 free (bbs);
2304 return res;
2305}
2306
08dab97a
RL
2307/* Create a reduction_info struct, initialize it with REDUC_STMT
2308 and PHI, insert it to the REDUCTION_LIST. */
2309
2310static void
c203e8a7 2311build_new_reduction (reduction_info_table_type *reduction_list,
538dd0b7 2312 gimple reduc_stmt, gphi *phi)
08dab97a 2313{
4a8fb1a1 2314 reduction_info **slot;
08dab97a
RL
2315 struct reduction_info *new_reduction;
2316
2317 gcc_assert (reduc_stmt);
b8698a0f 2318
08dab97a
RL
2319 if (dump_file && (dump_flags & TDF_DETAILS))
2320 {
2321 fprintf (dump_file,
2322 "Detected reduction. reduction stmt is: \n");
2323 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
2324 fprintf (dump_file, "\n");
2325 }
b8698a0f 2326
08dab97a 2327 new_reduction = XCNEW (struct reduction_info);
b8698a0f 2328
08dab97a
RL
2329 new_reduction->reduc_stmt = reduc_stmt;
2330 new_reduction->reduc_phi = phi;
5d1fd1de 2331 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
08dab97a 2332 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
c203e8a7 2333 slot = reduction_list->find_slot (new_reduction, INSERT);
08dab97a
RL
2334 *slot = new_reduction;
2335}
2336
5d1fd1de
JJ
2337/* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
2338
4a8fb1a1
LC
2339int
2340set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
5d1fd1de 2341{
4a8fb1a1 2342 struct reduction_info *const red = *slot;
5d1fd1de
JJ
2343 gimple_set_uid (red->reduc_phi, red->reduc_version);
2344 return 1;
2345}
2346
08dab97a
RL
2347/* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
2348
2349static void
c203e8a7 2350gather_scalar_reductions (loop_p loop, reduction_info_table_type *reduction_list)
08dab97a 2351{
538dd0b7 2352 gphi_iterator gsi;
08dab97a
RL
2353 loop_vec_info simple_loop_info;
2354
08dab97a
RL
2355 simple_loop_info = vect_analyze_loop_form (loop);
2356
2357 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2358 {
538dd0b7 2359 gphi *phi = gsi.phi ();
08dab97a
RL
2360 affine_iv iv;
2361 tree res = PHI_RESULT (phi);
2362 bool double_reduc;
2363
ea057359 2364 if (virtual_operand_p (res))
08dab97a
RL
2365 continue;
2366
2367 if (!simple_iv (loop, loop, res, &iv, true)
2368 && simple_loop_info)
2369 {
8a9ecffd
MM
2370 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
2371 phi, true,
2372 &double_reduc);
48710229 2373 if (reduc_stmt && !double_reduc)
08dab97a
RL
2374 build_new_reduction (reduction_list, reduc_stmt, phi);
2375 }
2376 }
5d1fd1de
JJ
2377 destroy_loop_vec_info (simple_loop_info, true);
2378
2379 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2380 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2381 only now. */
c203e8a7 2382 reduction_list->traverse <void *, set_reduc_phi_uids> (NULL);
08dab97a
RL
2383}
2384
2385/* Try to initialize NITER for code generation part. */
2386
2387static bool
2388try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2389{
2390 edge exit = single_dom_exit (loop);
2391
2392 gcc_assert (exit);
2393
2394 /* We need to know # of iterations, and there should be no uses of values
2395 defined inside loop outside of it, unless the values are invariants of
2396 the loop. */
2397 if (!number_of_iterations_exit (loop, exit, niter, false))
2398 {
2399 if (dump_file && (dump_flags & TDF_DETAILS))
2400 fprintf (dump_file, " FAILED: number of iterations not known\n");
2401 return false;
2402 }
2403
2404 return true;
2405}
2406
2407/* Try to initialize REDUCTION_LIST for code generation part.
2408 REDUCTION_LIST describes the reductions. */
2409
2410static bool
4a8fb1a1 2411try_create_reduction_list (loop_p loop,
c203e8a7 2412 reduction_info_table_type *reduction_list)
08dab97a
RL
2413{
2414 edge exit = single_dom_exit (loop);
538dd0b7 2415 gphi_iterator gsi;
08dab97a
RL
2416
2417 gcc_assert (exit);
2418
2419 gather_scalar_reductions (loop, reduction_list);
2420
b8698a0f 2421
08dab97a
RL
2422 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2423 {
538dd0b7 2424 gphi *phi = gsi.phi ();
08dab97a
RL
2425 struct reduction_info *red;
2426 imm_use_iterator imm_iter;
2427 use_operand_p use_p;
2428 gimple reduc_phi;
2429 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2430
ea057359 2431 if (!virtual_operand_p (val))
08dab97a
RL
2432 {
2433 if (dump_file && (dump_flags & TDF_DETAILS))
2434 {
2435 fprintf (dump_file, "phi is ");
2436 print_gimple_stmt (dump_file, phi, 0, 0);
2437 fprintf (dump_file, "arg of phi to exit: value ");
2438 print_generic_expr (dump_file, val, 0);
2439 fprintf (dump_file, " used outside loop\n");
2440 fprintf (dump_file,
2441 " checking if it a part of reduction pattern: \n");
2442 }
c203e8a7 2443 if (reduction_list->elements () == 0)
08dab97a
RL
2444 {
2445 if (dump_file && (dump_flags & TDF_DETAILS))
2446 fprintf (dump_file,
2447 " FAILED: it is not a part of reduction.\n");
2448 return false;
2449 }
2450 reduc_phi = NULL;
2451 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2452 {
4942af9b
JJ
2453 if (!gimple_debug_bind_p (USE_STMT (use_p))
2454 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
08dab97a
RL
2455 {
2456 reduc_phi = USE_STMT (use_p);
2457 break;
2458 }
2459 }
2460 red = reduction_phi (reduction_list, reduc_phi);
2461 if (red == NULL)
2462 {
2463 if (dump_file && (dump_flags & TDF_DETAILS))
2464 fprintf (dump_file,
2465 " FAILED: it is not a part of reduction.\n");
2466 return false;
2467 }
2468 if (dump_file && (dump_flags & TDF_DETAILS))
2469 {
2470 fprintf (dump_file, "reduction phi is ");
2471 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2472 fprintf (dump_file, "reduction stmt is ");
2473 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2474 }
2475 }
2476 }
2477
2478 /* The iterations of the loop may communicate only through bivs whose
2479 iteration space can be distributed efficiently. */
2480 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2481 {
538dd0b7 2482 gphi *phi = gsi.phi ();
08dab97a
RL
2483 tree def = PHI_RESULT (phi);
2484 affine_iv iv;
2485
ea057359 2486 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
08dab97a
RL
2487 {
2488 struct reduction_info *red;
2489
2490 red = reduction_phi (reduction_list, phi);
2491 if (red == NULL)
2492 {
2493 if (dump_file && (dump_flags & TDF_DETAILS))
2494 fprintf (dump_file,
2495 " FAILED: scalar dependency between iterations\n");
2496 return false;
2497 }
2498 }
2499 }
2500
2501
2502 return true;
2503}
2504
5f40b3cb
ZD
2505/* Detect parallel loops and generate parallel code using libgomp
2506 primitives. Returns true if some loop was parallelized, false
2507 otherwise. */
2508
09489eb8 2509static bool
5f40b3cb
ZD
2510parallelize_loops (void)
2511{
2512 unsigned n_threads = flag_tree_parallelize_loops;
2513 bool changed = false;
2514 struct loop *loop;
2515 struct tree_niter_desc niter_desc;
f873b205 2516 struct obstack parloop_obstack;
8adfe01d 2517 HOST_WIDE_INT estimated;
b05e0233 2518 source_location loop_loc;
f873b205 2519
5f40b3cb
ZD
2520 /* Do not parallelize loops in the functions created by parallelization. */
2521 if (parallelized_function_p (cfun->decl))
2522 return false;
8adfe01d
RL
2523 if (cfun->has_nonlocal_label)
2524 return false;
5f40b3cb 2525
f873b205 2526 gcc_obstack_init (&parloop_obstack);
c203e8a7 2527 reduction_info_table_type reduction_list (10);
726a989a 2528 init_stmt_vec_info_vec ();
a509ebb5 2529
f0bd40b1 2530 FOR_EACH_LOOP (loop, 0)
5f40b3cb 2531 {
4a8fb1a1 2532 reduction_list.empty ();
48710229
RL
2533 if (dump_file && (dump_flags & TDF_DETAILS))
2534 {
2535 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2536 if (loop->inner)
2537 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2538 else
2539 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2540 }
b8698a0f 2541
48710229 2542 /* If we use autopar in graphite pass, we use its marked dependency
87d4d0ee
SP
2543 checking results. */
2544 if (flag_loop_parallelize_all && !loop->can_be_parallel)
48710229
RL
2545 {
2546 if (dump_file && (dump_flags & TDF_DETAILS))
2547 fprintf (dump_file, "loop is not parallel according to graphite\n");
87d4d0ee 2548 continue;
48710229 2549 }
87d4d0ee 2550
48710229
RL
2551 if (!single_dom_exit (loop))
2552 {
b8698a0f 2553
48710229
RL
2554 if (dump_file && (dump_flags & TDF_DETAILS))
2555 fprintf (dump_file, "loop is !single_dom_exit\n");
b8698a0f 2556
08dab97a 2557 continue;
48710229 2558 }
08dab97a
RL
2559
2560 if (/* And of course, the loop must be parallelizable. */
2561 !can_duplicate_loop_p (loop)
1d4af1e8 2562 || loop_has_blocks_with_irreducible_flag (loop)
8adfe01d 2563 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
9857228c 2564 /* FIXME: the check for vector phi nodes could be removed. */
69958396 2565 || loop_has_vector_phi_nodes (loop))
08dab97a 2566 continue;
e5b332cd 2567
652c4c71 2568 estimated = estimated_stmt_executions_int (loop);
e5b332cd
RG
2569 if (estimated == -1)
2570 estimated = max_stmt_executions_int (loop);
87d4d0ee 2571 /* FIXME: Bypass this check as graphite doesn't update the
e5b332cd 2572 count and frequency correctly now. */
87d4d0ee 2573 if (!flag_loop_parallelize_all
e5b332cd
RG
2574 && ((estimated != -1
2575 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
87d4d0ee
SP
2576 /* Do not bother with loops in cold areas. */
2577 || optimize_loop_nest_for_size_p (loop)))
08dab97a 2578 continue;
b8698a0f 2579
08dab97a
RL
2580 if (!try_get_loop_niter (loop, &niter_desc))
2581 continue;
2582
c203e8a7 2583 if (!try_create_reduction_list (loop, &reduction_list))
08dab97a
RL
2584 continue;
2585
f873b205
LB
2586 if (!flag_loop_parallelize_all
2587 && !loop_parallel_p (loop, &parloop_obstack))
5f40b3cb
ZD
2588 continue;
2589
2590 changed = true;
48710229
RL
2591 if (dump_file && (dump_flags & TDF_DETAILS))
2592 {
48710229 2593 if (loop->inner)
8adfe01d 2594 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
48710229 2595 else
8adfe01d
RL
2596 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2597 loop_loc = find_loop_location (loop);
b05e0233 2598 if (loop_loc != UNKNOWN_LOCATION)
8adfe01d 2599 fprintf (dump_file, "\nloop at %s:%d: ",
b05e0233 2600 LOCATION_FILE (loop_loc), LOCATION_LINE (loop_loc));
b8698a0f 2601 }
c203e8a7 2602 gen_parallel_loop (loop, &reduction_list,
08dab97a 2603 n_threads, &niter_desc);
5f40b3cb
ZD
2604 }
2605
726a989a 2606 free_stmt_vec_info_vec ();
f873b205 2607 obstack_free (&parloop_obstack, NULL);
6b8ed145
RG
2608
2609 /* Parallelization will cause new function calls to be inserted through
d086d311
RG
2610 which local variables will escape. Reset the points-to solution
2611 for ESCAPED. */
6b8ed145 2612 if (changed)
d086d311 2613 pt_solution_reset (&cfun->gimple_df->escaped);
6b8ed145 2614
5f40b3cb
ZD
2615 return changed;
2616}
2617
c1bf2a39
AM
2618/* Parallelization. */
2619
c1bf2a39
AM
2620namespace {
2621
2622const pass_data pass_data_parallelize_loops =
2623{
2624 GIMPLE_PASS, /* type */
2625 "parloops", /* name */
2626 OPTGROUP_LOOP, /* optinfo_flags */
c1bf2a39
AM
2627 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
2628 ( PROP_cfg | PROP_ssa ), /* properties_required */
2629 0, /* properties_provided */
2630 0, /* properties_destroyed */
2631 0, /* todo_flags_start */
3bea341f 2632 0, /* todo_flags_finish */
c1bf2a39
AM
2633};
2634
2635class pass_parallelize_loops : public gimple_opt_pass
2636{
2637public:
2638 pass_parallelize_loops (gcc::context *ctxt)
2639 : gimple_opt_pass (pass_data_parallelize_loops, ctxt)
2640 {}
2641
2642 /* opt_pass methods: */
1a3d085c 2643 virtual bool gate (function *) { return flag_tree_parallelize_loops > 1; }
be55bfe6 2644 virtual unsigned int execute (function *);
c1bf2a39
AM
2645
2646}; // class pass_parallelize_loops
2647
be55bfe6
TS
2648unsigned
2649pass_parallelize_loops::execute (function *fun)
2650{
2651 if (number_of_loops (fun) <= 1)
2652 return 0;
2653
2654 if (parallelize_loops ())
18751894
TV
2655 {
2656 fun->curr_properties &= ~(PROP_gimple_eomp);
2657 return TODO_update_ssa;
2658 }
2659
be55bfe6
TS
2660 return 0;
2661}
2662
c1bf2a39
AM
2663} // anon namespace
2664
2665gimple_opt_pass *
2666make_pass_parallelize_loops (gcc::context *ctxt)
2667{
2668 return new pass_parallelize_loops (ctxt);
2669}