]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-parloops.c
* tree-flow.h (struct omp_region): Move to omp-low.c.
[thirdparty/gcc.git] / gcc / tree-parloops.c
CommitLineData
28c92cbb 1/* Loop autoparallelization.
711789cc 2 Copyright (C) 2006-2013 Free Software Foundation, Inc.
0773b627 3 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
4 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
28c92cbb 5
6This file is part of GCC.
7
8GCC is free software; you can redistribute it and/or modify it under
9the terms of the GNU General Public License as published by the Free
4ef8346d 10Software Foundation; either version 3, or (at your option) any later
28c92cbb 11version.
12
13GCC is distributed in the hope that it will be useful, but WITHOUT ANY
14WARRANTY; without even the implied warranty of MERCHANTABILITY or
15FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16for more details.
17
18You should have received a copy of the GNU General Public License
4ef8346d 19along with GCC; see the file COPYING3. If not see
20<http://www.gnu.org/licenses/>. */
28c92cbb 21
22#include "config.h"
23#include "system.h"
24#include "coretypes.h"
69ee5dbb 25#include "tree-ssa.h"
28c92cbb 26#include "cfgloop.h"
28c92cbb 27#include "tree-data-ref.h"
1e5b7b1f 28#include "tree-scalar-evolution.h"
ce084dfc 29#include "gimple-pretty-print.h"
28c92cbb 30#include "tree-pass.h"
28c92cbb 31#include "langhooks.h"
cb7f680b 32#include "tree-vectorizer.h"
d9dd21a8 33#include "tree-hasher.h"
64641360 34#include "tree-parloops.h"
7740abd8 35#include "omp-low.h"
28c92cbb 36
37/* This pass tries to distribute iterations of loops into several threads.
38 The implementation is straightforward -- for each loop we test whether its
39 iterations are independent, and if it is the case (and some additional
40 conditions regarding profitability and correctness are satisfied), we
75a70cf9 41 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
42 machinery do its job.
48e1416a 43
28c92cbb 44 The most of the complexity is in bringing the code into shape expected
45 by the omp expanders:
75a70cf9 46 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
47 variable and that the exit test is at the start of the loop body
48 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
28c92cbb 49 variables by accesses through pointers, and breaking up ssa chains
50 by storing the values incoming to the parallelized loop to a structure
51 passed to the new function as an argument (something similar is done
52 in omp gimplification, unfortunately only a small part of the code
53 can be shared).
54
55 TODO:
56 -- if there are several parallelizable loops in a function, it may be
57 possible to generate the threads just once (using synchronization to
58 ensure that cross-loop dependences are obeyed).
0773b627 59 -- handling of common reduction patterns for outer loops.
60
61 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
48e1416a 62/*
cb7f680b 63 Reduction handling:
f4a50267 64 currently we use vect_force_simple_reduction() to detect reduction patterns.
cb7f680b 65 The code transformation will be introduced by an example.
48e1416a 66
67
cb7f680b 68parloop
69{
70 int sum=1;
71
848674d0 72 for (i = 0; i < N; i++)
cb7f680b 73 {
74 x[i] = i + 3;
75 sum+=x[i];
76 }
77}
78
848674d0 79gimple-like code:
cb7f680b 80header_bb:
81
848674d0 82 # sum_29 = PHI <sum_11(5), 1(3)>
83 # i_28 = PHI <i_12(5), 0(3)>
84 D.1795_8 = i_28 + 3;
85 x[i_28] = D.1795_8;
86 sum_11 = D.1795_8 + sum_29;
87 i_12 = i_28 + 1;
88 if (N_6(D) > i_12)
89 goto header_bb;
90
cb7f680b 91
92exit_bb:
93
848674d0 94 # sum_21 = PHI <sum_11(4)>
95 printf (&"%d"[0], sum_21);
cb7f680b 96
97
98after reduction transformation (only relevant parts):
99
100parloop
101{
102
103....
104
848674d0 105
f0b5f617 106 # Storing the initial value given by the user. #
848674d0 107
5bb62c99 108 .paral_data_store.32.sum.27 = 1;
48e1416a 109
110 #pragma omp parallel num_threads(4)
cb7f680b 111
848674d0 112 #pragma omp for schedule(static)
5bb62c99 113
114 # The neutral element corresponding to the particular
115 reduction's operation, e.g. 0 for PLUS_EXPR,
116 1 for MULT_EXPR, etc. replaces the user's initial value. #
117
118 # sum.27_29 = PHI <sum.27_11, 0>
119
848674d0 120 sum.27_11 = D.1827_8 + sum.27_29;
5bb62c99 121
75a70cf9 122 GIMPLE_OMP_CONTINUE
cb7f680b 123
848674d0 124 # Adding this reduction phi is done at create_phi_for_local_result() #
125 # sum.27_56 = PHI <sum.27_11, 0>
75a70cf9 126 GIMPLE_OMP_RETURN
48e1416a 127
128 # Creating the atomic operation is done at
848674d0 129 create_call_for_reduction_1() #
cb7f680b 130
848674d0 131 #pragma omp atomic_load
132 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
133 D.1840_60 = sum.27_56 + D.1839_59;
134 #pragma omp atomic_store (D.1840_60);
48e1416a 135
75a70cf9 136 GIMPLE_OMP_RETURN
48e1416a 137
848674d0 138 # collecting the result after the join of the threads is done at
139 create_loads_for_reductions().
5bb62c99 140 The value computed by the threads is loaded from the
141 shared struct. #
142
48e1416a 143
848674d0 144 .paral_data_load.33_52 = &.paral_data_store.32;
5bb62c99 145 sum_37 = .paral_data_load.33_52->sum.27;
848674d0 146 sum_43 = D.1795_41 + sum_37;
147
148 exit bb:
149 # sum_21 = PHI <sum_43, sum_26>
150 printf (&"%d"[0], sum_21);
151
152...
153
cb7f680b 154}
155
156*/
157
28c92cbb 158/* Minimal number of iterations of a loop that should be executed in each
159 thread. */
160#define MIN_PER_THREAD 100
161
48e1416a 162/* Element of the hashtable, representing a
cb7f680b 163 reduction in the current loop. */
164struct reduction_info
165{
75a70cf9 166 gimple reduc_stmt; /* reduction statement. */
167 gimple reduc_phi; /* The phi node defining the reduction. */
168 enum tree_code reduction_code;/* code for the reduction operation. */
71fa519d 169 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
170 result. */
48e1416a 171 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
cb7f680b 172 of the reduction variable when existing the loop. */
5bb62c99 173 tree initial_value; /* The initial value of the reduction var before entering the loop. */
cb7f680b 174 tree field; /* the name of the field in the parloop data structure intended for reduction. */
cb7f680b 175 tree init; /* reduction initialization value. */
48e1416a 176 gimple new_phi; /* (helper field) Newly created phi node whose result
cb7f680b 177 will be passed to the atomic operation. Represents
178 the local result each thread computed for the reduction
179 operation. */
180};
181
d9dd21a8 182/* Reduction info hashtable helpers. */
cb7f680b 183
d9dd21a8 184struct reduction_hasher : typed_free_remove <reduction_info>
cb7f680b 185{
d9dd21a8 186 typedef reduction_info value_type;
187 typedef reduction_info compare_type;
188 static inline hashval_t hash (const value_type *);
189 static inline bool equal (const value_type *, const compare_type *);
190};
191
192/* Equality and hash functions for hashtab code. */
cb7f680b 193
d9dd21a8 194inline bool
195reduction_hasher::equal (const value_type *a, const compare_type *b)
196{
cb7f680b 197 return (a->reduc_phi == b->reduc_phi);
198}
199
d9dd21a8 200inline hashval_t
201reduction_hasher::hash (const value_type *a)
cb7f680b 202{
71fa519d 203 return a->reduc_version;
cb7f680b 204}
205
d9dd21a8 206typedef hash_table <reduction_hasher> reduction_info_table_type;
207
208
cb7f680b 209static struct reduction_info *
d9dd21a8 210reduction_phi (reduction_info_table_type reduction_list, gimple phi)
cb7f680b 211{
212 struct reduction_info tmpred, *red;
213
d9dd21a8 214 if (reduction_list.elements () == 0 || phi == NULL)
cb7f680b 215 return NULL;
216
217 tmpred.reduc_phi = phi;
71fa519d 218 tmpred.reduc_version = gimple_uid (phi);
d9dd21a8 219 red = reduction_list.find (&tmpred);
cb7f680b 220
221 return red;
222}
223
28c92cbb 224/* Element of hashtable of names to copy. */
225
226struct name_to_copy_elt
227{
228 unsigned version; /* The version of the name to copy. */
229 tree new_name; /* The new name used in the copy. */
230 tree field; /* The field of the structure used to pass the
231 value. */
232};
233
d9dd21a8 234/* Name copies hashtable helpers. */
28c92cbb 235
d9dd21a8 236struct name_to_copy_hasher : typed_free_remove <name_to_copy_elt>
28c92cbb 237{
d9dd21a8 238 typedef name_to_copy_elt value_type;
239 typedef name_to_copy_elt compare_type;
240 static inline hashval_t hash (const value_type *);
241 static inline bool equal (const value_type *, const compare_type *);
242};
243
244/* Equality and hash functions for hashtab code. */
28c92cbb 245
d9dd21a8 246inline bool
247name_to_copy_hasher::equal (const value_type *a, const compare_type *b)
248{
28c92cbb 249 return a->version == b->version;
250}
251
d9dd21a8 252inline hashval_t
253name_to_copy_hasher::hash (const value_type *a)
28c92cbb 254{
28c92cbb 255 return (hashval_t) a->version;
256}
257
d9dd21a8 258typedef hash_table <name_to_copy_hasher> name_to_copy_table_type;
259
e01f9f1f 260/* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
261 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
262 represents the denominator for every element in the matrix. */
263typedef struct lambda_trans_matrix_s
264{
265 lambda_matrix matrix;
266 int rowsize;
267 int colsize;
268 int denominator;
269} *lambda_trans_matrix;
270#define LTM_MATRIX(T) ((T)->matrix)
271#define LTM_ROWSIZE(T) ((T)->rowsize)
272#define LTM_COLSIZE(T) ((T)->colsize)
273#define LTM_DENOMINATOR(T) ((T)->denominator)
274
275/* Allocate a new transformation matrix. */
276
277static lambda_trans_matrix
278lambda_trans_matrix_new (int colsize, int rowsize,
279 struct obstack * lambda_obstack)
280{
281 lambda_trans_matrix ret;
282
283 ret = (lambda_trans_matrix)
284 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
285 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
286 LTM_ROWSIZE (ret) = rowsize;
287 LTM_COLSIZE (ret) = colsize;
288 LTM_DENOMINATOR (ret) = 1;
289 return ret;
290}
291
292/* Multiply a vector VEC by a matrix MAT.
293 MAT is an M*N matrix, and VEC is a vector with length N. The result
294 is stored in DEST which must be a vector of length M. */
295
296static void
297lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
298 lambda_vector vec, lambda_vector dest)
299{
300 int i, j;
301
302 lambda_vector_clear (dest, m);
303 for (i = 0; i < m; i++)
304 for (j = 0; j < n; j++)
305 dest[i] += matrix[i][j] * vec[j];
306}
307
308/* Return true if TRANS is a legal transformation matrix that respects
309 the dependence vectors in DISTS and DIRS. The conservative answer
310 is false.
311
312 "Wolfe proves that a unimodular transformation represented by the
313 matrix T is legal when applied to a loop nest with a set of
314 lexicographically non-negative distance vectors RDG if and only if
315 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
316 i.e.: if and only if it transforms the lexicographically positive
317 distance vectors to lexicographically positive vectors. Note that
318 a unimodular matrix must transform the zero vector (and only it) to
319 the zero vector." S.Muchnick. */
320
321static bool
322lambda_transform_legal_p (lambda_trans_matrix trans,
323 int nb_loops,
f1f41a6c 324 vec<ddr_p> dependence_relations)
e01f9f1f 325{
326 unsigned int i, j;
327 lambda_vector distres;
328 struct data_dependence_relation *ddr;
329
330 gcc_assert (LTM_COLSIZE (trans) == nb_loops
331 && LTM_ROWSIZE (trans) == nb_loops);
332
333 /* When there are no dependences, the transformation is correct. */
f1f41a6c 334 if (dependence_relations.length () == 0)
e01f9f1f 335 return true;
336
f1f41a6c 337 ddr = dependence_relations[0];
e01f9f1f 338 if (ddr == NULL)
339 return true;
340
341 /* When there is an unknown relation in the dependence_relations, we
342 know that it is no worth looking at this loop nest: give up. */
343 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
344 return false;
345
346 distres = lambda_vector_new (nb_loops);
347
348 /* For each distance vector in the dependence graph. */
f1f41a6c 349 FOR_EACH_VEC_ELT (dependence_relations, i, ddr)
e01f9f1f 350 {
351 /* Don't care about relations for which we know that there is no
352 dependence, nor about read-read (aka. output-dependences):
353 these data accesses can happen in any order. */
354 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
355 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
356 continue;
357
358 /* Conservatively answer: "this transformation is not valid". */
359 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
360 return false;
361
362 /* If the dependence could not be captured by a distance vector,
363 conservatively answer that the transform is not valid. */
364 if (DDR_NUM_DIST_VECTS (ddr) == 0)
365 return false;
366
367 /* Compute trans.dist_vect */
368 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
369 {
370 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
371 DDR_DIST_VECT (ddr, j), distres);
372
373 if (!lambda_vector_lexico_pos (distres, nb_loops))
374 return false;
375 }
376 }
377 return true;
378}
5fa90eea 379
380/* Data dependency analysis. Returns true if the iterations of LOOP
381 are independent on each other (that is, if we can execute them
382 in parallel). */
28c92cbb 383
384static bool
1e33ad50 385loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
28c92cbb 386{
f1f41a6c 387 vec<loop_p> loop_nest;
388 vec<ddr_p> dependence_relations;
389 vec<data_reference_p> datarefs;
28c92cbb 390 lambda_trans_matrix trans;
391 bool ret = false;
28c92cbb 392
393 if (dump_file && (dump_flags & TDF_DETAILS))
b0fb253a 394 {
395 fprintf (dump_file, "Considering loop %d\n", loop->num);
396 if (!loop->inner)
397 fprintf (dump_file, "loop is innermost\n");
48e1416a 398 else
b0fb253a 399 fprintf (dump_file, "loop NOT innermost\n");
400 }
28c92cbb 401
28c92cbb 402 /* Check for problems with dependences. If the loop can be reversed,
403 the iterations are independent. */
f1f41a6c 404 datarefs.create (10);
405 dependence_relations.create (10 * 10);
406 loop_nest.create (3);
713f1f14 407 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
408 &dependence_relations))
409 {
410 if (dump_file && (dump_flags & TDF_DETAILS))
411 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
412 ret = false;
413 goto end;
414 }
28c92cbb 415 if (dump_file && (dump_flags & TDF_DETAILS))
416 dump_data_dependence_relations (dump_file, dependence_relations);
417
1e33ad50 418 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
28c92cbb 419 LTM_MATRIX (trans)[0][0] = -1;
420
421 if (lambda_transform_legal_p (trans, 1, dependence_relations))
422 {
423 ret = true;
424 if (dump_file && (dump_flags & TDF_DETAILS))
425 fprintf (dump_file, " SUCCESS: may be parallelized\n");
426 }
427 else if (dump_file && (dump_flags & TDF_DETAILS))
cb7f680b 428 fprintf (dump_file,
429 " FAILED: data dependencies exist across iterations\n");
28c92cbb 430
713f1f14 431 end:
f1f41a6c 432 loop_nest.release ();
28c92cbb 433 free_dependence_relations (dependence_relations);
434 free_data_refs (datarefs);
435
436 return ret;
437}
438
d4fcfd16 439/* Return true when LOOP contains basic blocks marked with the
440 BB_IRREDUCIBLE_LOOP flag. */
441
442static inline bool
443loop_has_blocks_with_irreducible_flag (struct loop *loop)
444{
445 unsigned i;
446 basic_block *bbs = get_loop_body_in_dom_order (loop);
447 bool res = true;
448
449 for (i = 0; i < loop->num_nodes; i++)
450 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
451 goto end;
452
453 res = false;
454 end:
455 free (bbs);
456 return res;
457}
458
c1fb5b25 459/* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
e06f9c34 460 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
c1fb5b25 461 to their addresses that can be reused. The address of OBJ is known to
ad57283e 462 be invariant in the whole function. Other needed statements are placed
463 right before GSI. */
28c92cbb 464
465static tree
d9dd21a8 466take_address_of (tree obj, tree type, edge entry,
467 int_tree_htab_type decl_address, gimple_stmt_iterator *gsi)
28c92cbb 468{
c1fb5b25 469 int uid;
d9dd21a8 470 int_tree_map **dslot;
28c92cbb 471 struct int_tree_map ielt, *nielt;
03d37e4e 472 tree *var_p, name, addr;
75a70cf9 473 gimple stmt;
474 gimple_seq stmts;
28c92cbb 475
c1fb5b25 476 /* Since the address of OBJ is invariant, the trees may be shared.
477 Avoid rewriting unrelated parts of the code. */
478 obj = unshare_expr (obj);
479 for (var_p = &obj;
480 handled_component_p (*var_p);
481 var_p = &TREE_OPERAND (*var_p, 0))
482 continue;
c1fb5b25 483
64ade643 484 /* Canonicalize the access to base on a MEM_REF. */
485 if (DECL_P (*var_p))
486 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
487
488 /* Assign a canonical SSA name to the address of the base decl used
489 in the address and share it for all accesses and addresses based
490 on it. */
491 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
28c92cbb 492 ielt.uid = uid;
d9dd21a8 493 dslot = decl_address.find_slot_with_hash (&ielt, uid, INSERT);
28c92cbb 494 if (!*dslot)
495 {
ad57283e 496 if (gsi == NULL)
497 return NULL;
64ade643 498 addr = TREE_OPERAND (*var_p, 0);
a7c17572 499 const char *obj_name
500 = get_name (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
501 if (obj_name)
502 name = make_temp_ssa_name (TREE_TYPE (addr), NULL, obj_name);
503 else
504 name = make_ssa_name (TREE_TYPE (addr), NULL);
03d37e4e 505 stmt = gimple_build_assign (name, addr);
75a70cf9 506 gsi_insert_on_edge_immediate (entry, stmt);
28c92cbb 507
508 nielt = XNEW (struct int_tree_map);
509 nielt->uid = uid;
510 nielt->to = name;
511 *dslot = nielt;
28c92cbb 512 }
c1fb5b25 513 else
d9dd21a8 514 name = (*dslot)->to;
28c92cbb 515
64ade643 516 /* Express the address in terms of the canonical SSA name. */
517 TREE_OPERAND (*var_p, 0) = name;
ad57283e 518 if (gsi == NULL)
519 return build_fold_addr_expr_with_type (obj, type);
520
64ade643 521 name = force_gimple_operand (build_addr (obj, current_function_decl),
522 &stmts, true, NULL_TREE);
523 if (!gimple_seq_empty_p (stmts))
ad57283e 524 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
28c92cbb 525
64ade643 526 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
c1fb5b25 527 {
75a70cf9 528 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
c1fb5b25 529 NULL_TREE);
75a70cf9 530 if (!gimple_seq_empty_p (stmts))
ad57283e 531 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
c1fb5b25 532 }
28c92cbb 533
534 return name;
535}
536
cb7f680b 537/* Callback for htab_traverse. Create the initialization statement
48e1416a 538 for reduction described in SLOT, and place it at the preheader of
cb7f680b 539 the loop described in DATA. */
540
d9dd21a8 541int
542initialize_reductions (reduction_info **slot, struct loop *loop)
cb7f680b 543{
cb7f680b 544 tree init, c;
cb7f680b 545 tree bvar, type, arg;
546 edge e;
547
d9dd21a8 548 struct reduction_info *const reduc = *slot;
cb7f680b 549
48e1416a 550 /* Create initialization in preheader:
cb7f680b 551 reduction_variable = initialization value of reduction. */
552
48e1416a 553 /* In the phi node at the header, replace the argument coming
cb7f680b 554 from the preheader with the reduction initialization value. */
555
556 /* Create a new variable to initialize the reduction. */
557 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
558 bvar = create_tmp_var (type, "reduction");
cb7f680b 559
e60a6f7b 560 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
561 OMP_CLAUSE_REDUCTION);
cb7f680b 562 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
75a70cf9 563 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
cb7f680b 564
565 init = omp_reduction_init (c, TREE_TYPE (bvar));
566 reduc->init = init;
567
48e1416a 568 /* Replace the argument representing the initialization value
569 with the initialization value for the reduction (neutral
570 element for the particular operation, e.g. 0 for PLUS_EXPR,
571 1 for MULT_EXPR, etc).
572 Keep the old value in a new variable "reduction_initial",
573 that will be taken in consideration after the parallel
848674d0 574 computing is done. */
cb7f680b 575
576 e = loop_preheader_edge (loop);
577 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
578 /* Create new variable to hold the initial value. */
cb7f680b 579
cb7f680b 580 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
848674d0 581 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
5bb62c99 582 reduc->initial_value = arg;
cb7f680b 583 return 1;
584}
28c92cbb 585
586struct elv_data
587{
75a70cf9 588 struct walk_stmt_info info;
e06f9c34 589 edge entry;
d9dd21a8 590 int_tree_htab_type decl_address;
ad57283e 591 gimple_stmt_iterator *gsi;
28c92cbb 592 bool changed;
ad57283e 593 bool reset;
28c92cbb 594};
595
e06f9c34 596/* Eliminates references to local variables in *TP out of the single
597 entry single exit region starting at DTA->ENTRY.
598 DECL_ADDRESS contains addresses of the references that had their
599 address taken already. If the expression is changed, CHANGED is
600 set to true. Callback for walk_tree. */
cb7f680b 601
28c92cbb 602static tree
c1fb5b25 603eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
28c92cbb 604{
45ba1503 605 struct elv_data *const dta = (struct elv_data *) data;
c1fb5b25 606 tree t = *tp, var, addr, addr_type, type, obj;
28c92cbb 607
608 if (DECL_P (t))
609 {
610 *walk_subtrees = 0;
611
612 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
613 return NULL_TREE;
614
615 type = TREE_TYPE (t);
616 addr_type = build_pointer_type (type);
ad57283e 617 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
618 dta->gsi);
619 if (dta->gsi == NULL && addr == NULL_TREE)
620 {
621 dta->reset = true;
622 return NULL_TREE;
623 }
624
182cf5a9 625 *tp = build_simple_mem_ref (addr);
28c92cbb 626
627 dta->changed = true;
628 return NULL_TREE;
629 }
630
631 if (TREE_CODE (t) == ADDR_EXPR)
632 {
c1fb5b25 633 /* ADDR_EXPR may appear in two contexts:
634 -- as a gimple operand, when the address taken is a function invariant
635 -- as gimple rhs, when the resulting address in not a function
636 invariant
637 We do not need to do anything special in the latter case (the base of
638 the memory reference whose address is taken may be replaced in the
639 DECL_P case). The former case is more complicated, as we need to
640 ensure that the new address is still a gimple operand. Thus, it
641 is not sufficient to replace just the base of the memory reference --
642 we need to move the whole computation of the address out of the
643 loop. */
644 if (!is_gimple_val (t))
28c92cbb 645 return NULL_TREE;
646
647 *walk_subtrees = 0;
c1fb5b25 648 obj = TREE_OPERAND (t, 0);
649 var = get_base_address (obj);
650 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
28c92cbb 651 return NULL_TREE;
652
653 addr_type = TREE_TYPE (t);
ad57283e 654 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
655 dta->gsi);
656 if (dta->gsi == NULL && addr == NULL_TREE)
657 {
658 dta->reset = true;
659 return NULL_TREE;
660 }
28c92cbb 661 *tp = addr;
662
663 dta->changed = true;
664 return NULL_TREE;
665 }
666
75a70cf9 667 if (!EXPR_P (t))
28c92cbb 668 *walk_subtrees = 0;
669
670 return NULL_TREE;
671}
672
ad57283e 673/* Moves the references to local variables in STMT at *GSI out of the single
e06f9c34 674 entry single exit region starting at ENTRY. DECL_ADDRESS contains
675 addresses of the references that had their address taken
676 already. */
28c92cbb 677
678static void
ad57283e 679eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
d9dd21a8 680 int_tree_htab_type decl_address)
28c92cbb 681{
682 struct elv_data dta;
ad57283e 683 gimple stmt = gsi_stmt (*gsi);
28c92cbb 684
75a70cf9 685 memset (&dta.info, '\0', sizeof (dta.info));
e06f9c34 686 dta.entry = entry;
28c92cbb 687 dta.decl_address = decl_address;
688 dta.changed = false;
ad57283e 689 dta.reset = false;
28c92cbb 690
9845d120 691 if (gimple_debug_bind_p (stmt))
ad57283e 692 {
693 dta.gsi = NULL;
694 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
695 eliminate_local_variables_1, &dta.info, NULL);
696 if (dta.reset)
697 {
698 gimple_debug_bind_reset_value (stmt);
699 dta.changed = true;
700 }
701 }
a7c17572 702 else if (gimple_clobber_p (stmt))
703 {
704 stmt = gimple_build_nop ();
705 gsi_replace (gsi, stmt, false);
706 dta.changed = true;
707 }
9845d120 708 else
ad57283e 709 {
710 dta.gsi = gsi;
711 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
712 }
28c92cbb 713
714 if (dta.changed)
715 update_stmt (stmt);
716}
717
e06f9c34 718/* Eliminates the references to local variables from the single entry
719 single exit region between the ENTRY and EXIT edges.
48e1416a 720
cb7f680b 721 This includes:
48e1416a 722 1) Taking address of a local variable -- these are moved out of the
723 region (and temporary variable is created to hold the address if
cb7f680b 724 necessary).
e06f9c34 725
28c92cbb 726 2) Dereferencing a local variable -- these are replaced with indirect
cb7f680b 727 references. */
28c92cbb 728
729static void
e06f9c34 730eliminate_local_variables (edge entry, edge exit)
28c92cbb 731{
e06f9c34 732 basic_block bb;
f1f41a6c 733 vec<basic_block> body;
734 body.create (3);
28c92cbb 735 unsigned i;
75a70cf9 736 gimple_stmt_iterator gsi;
ad57283e 737 bool has_debug_stmt = false;
d9dd21a8 738 int_tree_htab_type decl_address;
739 decl_address.create (10);
e06f9c34 740 basic_block entry_bb = entry->src;
741 basic_block exit_bb = exit->dest;
28c92cbb 742
e06f9c34 743 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
28c92cbb 744
f1f41a6c 745 FOR_EACH_VEC_ELT (body, i, bb)
e06f9c34 746 if (bb != entry_bb && bb != exit_bb)
75a70cf9 747 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
841424cc 748 if (is_gimple_debug (gsi_stmt (gsi)))
749 {
750 if (gimple_debug_bind_p (gsi_stmt (gsi)))
751 has_debug_stmt = true;
752 }
ad57283e 753 else
754 eliminate_local_variables_stmt (entry, &gsi, decl_address);
755
756 if (has_debug_stmt)
f1f41a6c 757 FOR_EACH_VEC_ELT (body, i, bb)
ad57283e 758 if (bb != entry_bb && bb != exit_bb)
759 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
760 if (gimple_debug_bind_p (gsi_stmt (gsi)))
761 eliminate_local_variables_stmt (entry, &gsi, decl_address);
28c92cbb 762
d9dd21a8 763 decl_address.dispose ();
f1f41a6c 764 body.release ();
e06f9c34 765}
766
767/* Returns true if expression EXPR is not defined between ENTRY and
768 EXIT, i.e. if all its operands are defined outside of the region. */
769
770static bool
771expr_invariant_in_region_p (edge entry, edge exit, tree expr)
772{
773 basic_block entry_bb = entry->src;
774 basic_block exit_bb = exit->dest;
775 basic_block def_bb;
e06f9c34 776
777 if (is_gimple_min_invariant (expr))
778 return true;
779
780 if (TREE_CODE (expr) == SSA_NAME)
781 {
75a70cf9 782 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
e06f9c34 783 if (def_bb
784 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
785 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
786 return false;
787
788 return true;
789 }
790
75a70cf9 791 return false;
28c92cbb 792}
793
794/* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
795 The copies are stored to NAME_COPIES, if NAME was already duplicated,
796 its duplicate stored in NAME_COPIES is returned.
48e1416a 797
28c92cbb 798 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
799 duplicated, storing the copies in DECL_COPIES. */
800
801static tree
d9dd21a8 802separate_decls_in_region_name (tree name, name_to_copy_table_type name_copies,
803 int_tree_htab_type decl_copies, bool copy_name_p)
28c92cbb 804{
805 tree copy, var, var_copy;
806 unsigned idx, uid, nuid;
807 struct int_tree_map ielt, *nielt;
808 struct name_to_copy_elt elt, *nelt;
d9dd21a8 809 name_to_copy_elt **slot;
810 int_tree_map **dslot;
28c92cbb 811
812 if (TREE_CODE (name) != SSA_NAME)
813 return name;
814
815 idx = SSA_NAME_VERSION (name);
816 elt.version = idx;
d9dd21a8 817 slot = name_copies.find_slot_with_hash (&elt, idx,
818 copy_name_p ? INSERT : NO_INSERT);
28c92cbb 819 if (slot && *slot)
d9dd21a8 820 return (*slot)->new_name;
28c92cbb 821
ec11736b 822 if (copy_name_p)
823 {
824 copy = duplicate_ssa_name (name, NULL);
825 nelt = XNEW (struct name_to_copy_elt);
826 nelt->version = idx;
827 nelt->new_name = copy;
828 nelt->field = NULL_TREE;
829 *slot = nelt;
830 }
831 else
832 {
833 gcc_assert (!slot);
834 copy = name;
835 }
836
28c92cbb 837 var = SSA_NAME_VAR (name);
ec11736b 838 if (!var)
839 return copy;
840
28c92cbb 841 uid = DECL_UID (var);
842 ielt.uid = uid;
d9dd21a8 843 dslot = decl_copies.find_slot_with_hash (&ielt, uid, INSERT);
28c92cbb 844 if (!*dslot)
845 {
846 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
55ed4df6 847 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
28c92cbb 848 nielt = XNEW (struct int_tree_map);
849 nielt->uid = uid;
850 nielt->to = var_copy;
851 *dslot = nielt;
852
853 /* Ensure that when we meet this decl next time, we won't duplicate
cb7f680b 854 it again. */
28c92cbb 855 nuid = DECL_UID (var_copy);
856 ielt.uid = nuid;
d9dd21a8 857 dslot = decl_copies.find_slot_with_hash (&ielt, nuid, INSERT);
28c92cbb 858 gcc_assert (!*dslot);
859 nielt = XNEW (struct int_tree_map);
860 nielt->uid = nuid;
861 nielt->to = var_copy;
862 *dslot = nielt;
863 }
864 else
865 var_copy = ((struct int_tree_map *) *dslot)->to;
866
3b652cc1 867 replace_ssa_name_symbol (copy, var_copy);
28c92cbb 868 return copy;
869}
870
e06f9c34 871/* Finds the ssa names used in STMT that are defined outside the
872 region between ENTRY and EXIT and replaces such ssa names with
873 their duplicates. The duplicates are stored to NAME_COPIES. Base
874 decls of all ssa names used in STMT (including those defined in
875 LOOP) are replaced with the new temporary variables; the
876 replacement decls are stored in DECL_COPIES. */
28c92cbb 877
878static void
75a70cf9 879separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
d9dd21a8 880 name_to_copy_table_type name_copies,
881 int_tree_htab_type decl_copies)
28c92cbb 882{
883 use_operand_p use;
884 def_operand_p def;
885 ssa_op_iter oi;
886 tree name, copy;
887 bool copy_name_p;
888
28c92cbb 889 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
cb7f680b 890 {
891 name = DEF_FROM_PTR (def);
892 gcc_assert (TREE_CODE (name) == SSA_NAME);
e06f9c34 893 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
894 false);
cb7f680b 895 gcc_assert (copy == name);
896 }
28c92cbb 897
898 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
cb7f680b 899 {
900 name = USE_FROM_PTR (use);
901 if (TREE_CODE (name) != SSA_NAME)
902 continue;
903
e06f9c34 904 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
905 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
906 copy_name_p);
cb7f680b 907 SET_USE (use, copy);
908 }
28c92cbb 909}
910
9845d120 911/* Finds the ssa names used in STMT that are defined outside the
912 region between ENTRY and EXIT and replaces such ssa names with
913 their duplicates. The duplicates are stored to NAME_COPIES. Base
914 decls of all ssa names used in STMT (including those defined in
915 LOOP) are replaced with the new temporary variables; the
916 replacement decls are stored in DECL_COPIES. */
917
918static bool
d9dd21a8 919separate_decls_in_region_debug (gimple stmt,
920 name_to_copy_table_type name_copies,
921 int_tree_htab_type decl_copies)
9845d120 922{
923 use_operand_p use;
924 ssa_op_iter oi;
925 tree var, name;
926 struct int_tree_map ielt;
927 struct name_to_copy_elt elt;
d9dd21a8 928 name_to_copy_elt **slot;
929 int_tree_map **dslot;
9845d120 930
841424cc 931 if (gimple_debug_bind_p (stmt))
932 var = gimple_debug_bind_get_var (stmt);
933 else if (gimple_debug_source_bind_p (stmt))
934 var = gimple_debug_source_bind_get_var (stmt);
935 else
936 return true;
eee873f6 937 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
9e3c8673 938 return true;
9845d120 939 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
940 ielt.uid = DECL_UID (var);
d9dd21a8 941 dslot = decl_copies.find_slot_with_hash (&ielt, ielt.uid, NO_INSERT);
9845d120 942 if (!dslot)
943 return true;
841424cc 944 if (gimple_debug_bind_p (stmt))
945 gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
946 else if (gimple_debug_source_bind_p (stmt))
947 gimple_debug_source_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
9845d120 948
949 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
950 {
951 name = USE_FROM_PTR (use);
952 if (TREE_CODE (name) != SSA_NAME)
953 continue;
954
955 elt.version = SSA_NAME_VERSION (name);
d9dd21a8 956 slot = name_copies.find_slot_with_hash (&elt, elt.version, NO_INSERT);
9845d120 957 if (!slot)
958 {
959 gimple_debug_bind_reset_value (stmt);
960 update_stmt (stmt);
961 break;
962 }
963
d9dd21a8 964 SET_USE (use, (*slot)->new_name);
9845d120 965 }
966
967 return false;
968}
969
848674d0 970/* Callback for htab_traverse. Adds a field corresponding to the reduction
971 specified in SLOT. The type is passed in DATA. */
972
d9dd21a8 973int
974add_field_for_reduction (reduction_info **slot, tree type)
cb7f680b 975{
48e1416a 976
d9dd21a8 977 struct reduction_info *const red = *slot;
59c0ed80 978 tree var = gimple_assign_lhs (red->reduc_stmt);
979 tree field = build_decl (gimple_location (red->reduc_stmt), FIELD_DECL,
980 SSA_NAME_IDENTIFIER (var), TREE_TYPE (var));
848674d0 981
982 insert_field_into_struct (type, field);
983
984 red->field = field;
985
986 return 1;
987}
cb7f680b 988
28c92cbb 989/* Callback for htab_traverse. Adds a field corresponding to a ssa name
48e1416a 990 described in SLOT. The type is passed in DATA. */
28c92cbb 991
d9dd21a8 992int
993add_field_for_name (name_to_copy_elt **slot, tree type)
28c92cbb 994{
d9dd21a8 995 struct name_to_copy_elt *const elt = *slot;
28c92cbb 996 tree name = ssa_name (elt->version);
ec11736b 997 tree field = build_decl (UNKNOWN_LOCATION,
998 FIELD_DECL, SSA_NAME_IDENTIFIER (name),
999 TREE_TYPE (name));
28c92cbb 1000
1001 insert_field_into_struct (type, field);
1002 elt->field = field;
cb7f680b 1003
28c92cbb 1004 return 1;
1005}
1006
48e1416a 1007/* Callback for htab_traverse. A local result is the intermediate result
1008 computed by a single
f0b5f617 1009 thread, or the initial value in case no iteration was executed.
48e1416a 1010 This function creates a phi node reflecting these values.
1011 The phi's result will be stored in NEW_PHI field of the
1012 reduction's data structure. */
cb7f680b 1013
d9dd21a8 1014int
1015create_phi_for_local_result (reduction_info **slot, struct loop *loop)
cb7f680b 1016{
d9dd21a8 1017 struct reduction_info *const reduc = *slot;
cb7f680b 1018 edge e;
75a70cf9 1019 gimple new_phi;
cb7f680b 1020 basic_block store_bb;
1021 tree local_res;
efbcb6de 1022 source_location locus;
cb7f680b 1023
48e1416a 1024 /* STORE_BB is the block where the phi
1025 should be stored. It is the destination of the loop exit.
75a70cf9 1026 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
cb7f680b 1027 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1028
1029 /* STORE_BB has two predecessors. One coming from the loop
1030 (the reduction's result is computed at the loop),
48e1416a 1031 and another coming from a block preceding the loop,
1032 when no iterations
1033 are executed (the initial value should be taken). */
cb7f680b 1034 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1035 e = EDGE_PRED (store_bb, 1);
1036 else
1037 e = EDGE_PRED (store_bb, 0);
7ecda5e8 1038 local_res = copy_ssa_name (gimple_assign_lhs (reduc->reduc_stmt), NULL);
efbcb6de 1039 locus = gimple_location (reduc->reduc_stmt);
cb7f680b 1040 new_phi = create_phi_node (local_res, store_bb);
60d535d2 1041 add_phi_arg (new_phi, reduc->init, e, locus);
75a70cf9 1042 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
60d535d2 1043 FALLTHRU_EDGE (loop->latch), locus);
cb7f680b 1044 reduc->new_phi = new_phi;
1045
1046 return 1;
1047}
28c92cbb 1048
1049struct clsn_data
1050{
1051 tree store;
1052 tree load;
1053
1054 basic_block store_bb;
1055 basic_block load_bb;
1056};
1057
cb7f680b 1058/* Callback for htab_traverse. Create an atomic instruction for the
48e1416a 1059 reduction described in SLOT.
cb7f680b 1060 DATA annotates the place in memory the atomic operation relates to,
1061 and the basic block it needs to be generated in. */
1062
d9dd21a8 1063int
1064create_call_for_reduction_1 (reduction_info **slot, struct clsn_data *clsn_data)
cb7f680b 1065{
d9dd21a8 1066 struct reduction_info *const reduc = *slot;
75a70cf9 1067 gimple_stmt_iterator gsi;
cb7f680b 1068 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
cb7f680b 1069 tree load_struct;
1070 basic_block bb;
1071 basic_block new_bb;
1072 edge e;
f018d957 1073 tree t, addr, ref, x;
75a70cf9 1074 tree tmp_load, name;
1075 gimple load;
cb7f680b 1076
182cf5a9 1077 load_struct = build_simple_mem_ref (clsn_data->load);
cb7f680b 1078 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
cb7f680b 1079
1080 addr = build_addr (t, current_function_decl);
1081
1082 /* Create phi node. */
1083 bb = clsn_data->load_bb;
1084
1085 e = split_block (bb, t);
1086 new_bb = e->dest;
1087
1088 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
cb7f680b 1089 tmp_load = make_ssa_name (tmp_load, NULL);
75a70cf9 1090 load = gimple_build_omp_atomic_load (tmp_load, addr);
cb7f680b 1091 SSA_NAME_DEF_STMT (tmp_load) = load;
75a70cf9 1092 gsi = gsi_start_bb (new_bb);
1093 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
cb7f680b 1094
1095 e = split_block (new_bb, load);
1096 new_bb = e->dest;
75a70cf9 1097 gsi = gsi_start_bb (new_bb);
cb7f680b 1098 ref = tmp_load;
75a70cf9 1099 x = fold_build2 (reduc->reduction_code,
1100 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1101 PHI_RESULT (reduc->new_phi));
cb7f680b 1102
75a70cf9 1103 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1104 GSI_CONTINUE_LINKING);
cb7f680b 1105
75a70cf9 1106 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
cb7f680b 1107 return 1;
1108}
1109
48e1416a 1110/* Create the atomic operation at the join point of the threads.
1111 REDUCTION_LIST describes the reductions in the LOOP.
1112 LD_ST_DATA describes the shared data structure where
cb7f680b 1113 shared data is stored in and loaded from. */
1114static void
d9dd21a8 1115create_call_for_reduction (struct loop *loop,
1116 reduction_info_table_type reduction_list,
cb7f680b 1117 struct clsn_data *ld_st_data)
1118{
d9dd21a8 1119 reduction_list.traverse <struct loop *, create_phi_for_local_result> (loop);
75a70cf9 1120 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
cb7f680b 1121 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
d9dd21a8 1122 reduction_list
1123 .traverse <struct clsn_data *, create_call_for_reduction_1> (ld_st_data);
cb7f680b 1124}
1125
5bb62c99 1126/* Callback for htab_traverse. Loads the final reduction value at the
1127 join point of all threads, and inserts it in the right place. */
cb7f680b 1128
d9dd21a8 1129int
1130create_loads_for_reductions (reduction_info **slot, struct clsn_data *clsn_data)
cb7f680b 1131{
d9dd21a8 1132 struct reduction_info *const red = *slot;
75a70cf9 1133 gimple stmt;
1134 gimple_stmt_iterator gsi;
1135 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
cb7f680b 1136 tree load_struct;
5bb62c99 1137 tree name;
cb7f680b 1138 tree x;
1139
75a70cf9 1140 gsi = gsi_after_labels (clsn_data->load_bb);
182cf5a9 1141 load_struct = build_simple_mem_ref (clsn_data->load);
cb7f680b 1142 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1143 NULL_TREE);
cb7f680b 1144
5bb62c99 1145 x = load_struct;
cb7f680b 1146 name = PHI_RESULT (red->keep_res);
75a70cf9 1147 stmt = gimple_build_assign (name, x);
cb7f680b 1148 SSA_NAME_DEF_STMT (name) = stmt;
1149
75a70cf9 1150 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
cb7f680b 1151
75a70cf9 1152 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1153 !gsi_end_p (gsi); gsi_next (&gsi))
1154 if (gsi_stmt (gsi) == red->keep_res)
1155 {
1156 remove_phi_node (&gsi, false);
1157 return 1;
1158 }
1159 gcc_unreachable ();
cb7f680b 1160}
1161
48e1416a 1162/* Load the reduction result that was stored in LD_ST_DATA.
cb7f680b 1163 REDUCTION_LIST describes the list of reductions that the
f0b5f617 1164 loads should be generated for. */
cb7f680b 1165static void
d9dd21a8 1166create_final_loads_for_reduction (reduction_info_table_type reduction_list,
cb7f680b 1167 struct clsn_data *ld_st_data)
1168{
75a70cf9 1169 gimple_stmt_iterator gsi;
cb7f680b 1170 tree t;
75a70cf9 1171 gimple stmt;
cb7f680b 1172
75a70cf9 1173 gsi = gsi_after_labels (ld_st_data->load_bb);
cb7f680b 1174 t = build_fold_addr_expr (ld_st_data->store);
75a70cf9 1175 stmt = gimple_build_assign (ld_st_data->load, t);
cb7f680b 1176
75a70cf9 1177 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1178 SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
cb7f680b 1179
d9dd21a8 1180 reduction_list
1181 .traverse <struct clsn_data *, create_loads_for_reductions> (ld_st_data);
cb7f680b 1182
1183}
1184
848674d0 1185/* Callback for htab_traverse. Store the neutral value for the
1186 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1187 1 for MULT_EXPR, etc. into the reduction field.
48e1416a 1188 The reduction is specified in SLOT. The store information is
1189 passed in DATA. */
848674d0 1190
d9dd21a8 1191int
1192create_stores_for_reduction (reduction_info **slot, struct clsn_data *clsn_data)
848674d0 1193{
d9dd21a8 1194 struct reduction_info *const red = *slot;
75a70cf9 1195 tree t;
1196 gimple stmt;
1197 gimple_stmt_iterator gsi;
1198 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1199
1200 gsi = gsi_last_bb (clsn_data->store_bb);
1201 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1202 stmt = gimple_build_assign (t, red->initial_value);
75a70cf9 1203 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
848674d0 1204
1205 return 1;
1206}
1207
cb7f680b 1208/* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1209 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1210 specified in SLOT. */
1211
d9dd21a8 1212int
1213create_loads_and_stores_for_name (name_to_copy_elt **slot,
1214 struct clsn_data *clsn_data)
28c92cbb 1215{
d9dd21a8 1216 struct name_to_copy_elt *const elt = *slot;
75a70cf9 1217 tree t;
1218 gimple stmt;
1219 gimple_stmt_iterator gsi;
28c92cbb 1220 tree type = TREE_TYPE (elt->new_name);
28c92cbb 1221 tree load_struct;
1222
75a70cf9 1223 gsi = gsi_last_bb (clsn_data->store_bb);
1224 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1225 stmt = gimple_build_assign (t, ssa_name (elt->version));
75a70cf9 1226 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
28c92cbb 1227
75a70cf9 1228 gsi = gsi_last_bb (clsn_data->load_bb);
182cf5a9 1229 load_struct = build_simple_mem_ref (clsn_data->load);
75a70cf9 1230 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1231 stmt = gimple_build_assign (elt->new_name, t);
28c92cbb 1232 SSA_NAME_DEF_STMT (elt->new_name) = stmt;
75a70cf9 1233 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
28c92cbb 1234
1235 return 1;
1236}
1237
1238/* Moves all the variables used in LOOP and defined outside of it (including
1239 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1240 name) to a structure created for this purpose. The code
48e1416a 1241
28c92cbb 1242 while (1)
1243 {
1244 use (a);
1245 use (b);
1246 }
1247
1248 is transformed this way:
1249
1250 bb0:
1251 old.a = a;
1252 old.b = b;
1253
1254 bb1:
1255 a' = new->a;
1256 b' = new->b;
1257 while (1)
1258 {
1259 use (a');
1260 use (b');
1261 }
1262
1263 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1264 pointer `new' is intentionally not initialized (the loop will be split to a
1265 separate function later, and `new' will be initialized from its arguments).
cb7f680b 1266 LD_ST_DATA holds information about the shared data structure used to pass
48e1416a 1267 information among the threads. It is initialized here, and
1268 gen_parallel_loop will pass it to create_call_for_reduction that
1269 needs this information. REDUCTION_LIST describes the reductions
cb7f680b 1270 in LOOP. */
28c92cbb 1271
1272static void
d9dd21a8 1273separate_decls_in_region (edge entry, edge exit,
1274 reduction_info_table_type reduction_list,
48e1416a 1275 tree *arg_struct, tree *new_arg_struct,
e06f9c34 1276 struct clsn_data *ld_st_data)
cb7f680b 1277
28c92cbb 1278{
e06f9c34 1279 basic_block bb1 = split_edge (entry);
28c92cbb 1280 basic_block bb0 = single_pred (bb1);
d9dd21a8 1281 name_to_copy_table_type name_copies;
1282 name_copies.create (10);
1283 int_tree_htab_type decl_copies;
1284 decl_copies.create (10);
28c92cbb 1285 unsigned i;
75a70cf9 1286 tree type, type_name, nvar;
1287 gimple_stmt_iterator gsi;
28c92cbb 1288 struct clsn_data clsn_data;
f1f41a6c 1289 vec<basic_block> body;
1290 body.create (3);
e06f9c34 1291 basic_block bb;
1292 basic_block entry_bb = bb1;
1293 basic_block exit_bb = exit->dest;
9845d120 1294 bool has_debug_stmt = false;
28c92cbb 1295
75a70cf9 1296 entry = single_succ_edge (entry_bb);
e06f9c34 1297 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
28c92cbb 1298
f1f41a6c 1299 FOR_EACH_VEC_ELT (body, i, bb)
e06f9c34 1300 {
48e1416a 1301 if (bb != entry_bb && bb != exit_bb)
e06f9c34 1302 {
75a70cf9 1303 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1304 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1305 name_copies, decl_copies);
1306
1307 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
9845d120 1308 {
1309 gimple stmt = gsi_stmt (gsi);
1310
1311 if (is_gimple_debug (stmt))
1312 has_debug_stmt = true;
1313 else
1314 separate_decls_in_region_stmt (entry, exit, stmt,
1315 name_copies, decl_copies);
1316 }
e06f9c34 1317 }
28c92cbb 1318 }
e06f9c34 1319
9845d120 1320 /* Now process debug bind stmts. We must not create decls while
1321 processing debug stmts, so we defer their processing so as to
1322 make sure we will have debug info for as many variables as
1323 possible (all of those that were dealt with in the loop above),
1324 and discard those for which we know there's nothing we can
1325 do. */
1326 if (has_debug_stmt)
f1f41a6c 1327 FOR_EACH_VEC_ELT (body, i, bb)
9845d120 1328 if (bb != entry_bb && bb != exit_bb)
1329 {
1330 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1331 {
1332 gimple stmt = gsi_stmt (gsi);
1333
841424cc 1334 if (is_gimple_debug (stmt))
9845d120 1335 {
841424cc 1336 if (separate_decls_in_region_debug (stmt, name_copies,
1337 decl_copies))
9845d120 1338 {
1339 gsi_remove (&gsi, true);
1340 continue;
1341 }
1342 }
1343
1344 gsi_next (&gsi);
1345 }
1346 }
1347
f1f41a6c 1348 body.release ();
28c92cbb 1349
d9dd21a8 1350 if (name_copies.elements () == 0 && reduction_list.elements () == 0)
28c92cbb 1351 {
1352 /* It may happen that there is nothing to copy (if there are only
cb7f680b 1353 loop carried and external variables in the loop). */
28c92cbb 1354 *arg_struct = NULL;
1355 *new_arg_struct = NULL;
1356 }
1357 else
1358 {
1359 /* Create the type for the structure to store the ssa names to. */
1360 type = lang_hooks.types.make_type (RECORD_TYPE);
0aecb55e 1361 type_name = build_decl (UNKNOWN_LOCATION,
e60a6f7b 1362 TYPE_DECL, create_tmp_var_name (".paral_data"),
28c92cbb 1363 type);
1364 TYPE_NAME (type) = type_name;
1365
d9dd21a8 1366 name_copies.traverse <tree, add_field_for_name> (type);
1367 if (reduction_list.is_created () && reduction_list.elements () > 0)
848674d0 1368 {
1369 /* Create the fields for reductions. */
d9dd21a8 1370 reduction_list.traverse <tree, add_field_for_reduction> (type);
848674d0 1371 }
28c92cbb 1372 layout_type (type);
48e1416a 1373
28c92cbb 1374 /* Create the loads and stores. */
1375 *arg_struct = create_tmp_var (type, ".paral_data_store");
28c92cbb 1376 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
75a70cf9 1377 *new_arg_struct = make_ssa_name (nvar, NULL);
28c92cbb 1378
cb7f680b 1379 ld_st_data->store = *arg_struct;
1380 ld_st_data->load = *new_arg_struct;
1381 ld_st_data->store_bb = bb0;
1382 ld_st_data->load_bb = bb1;
848674d0 1383
d9dd21a8 1384 name_copies
1385 .traverse <struct clsn_data *, create_loads_and_stores_for_name>
1386 (ld_st_data);
cb7f680b 1387
5bb62c99 1388 /* Load the calculation from memory (after the join of the threads). */
1389
d9dd21a8 1390 if (reduction_list.is_created () && reduction_list.elements () > 0)
cb7f680b 1391 {
d9dd21a8 1392 reduction_list
1393 .traverse <struct clsn_data *, create_stores_for_reduction>
1394 (ld_st_data);
75a70cf9 1395 clsn_data.load = make_ssa_name (nvar, NULL);
e06f9c34 1396 clsn_data.load_bb = exit->dest;
cb7f680b 1397 clsn_data.store = ld_st_data->store;
1398 create_final_loads_for_reduction (reduction_list, &clsn_data);
1399 }
28c92cbb 1400 }
1401
d9dd21a8 1402 decl_copies.dispose ();
1403 name_copies.dispose ();
28c92cbb 1404}
1405
1406/* Bitmap containing uids of functions created by parallelization. We cannot
1407 allocate it from the default obstack, as it must live across compilation
1408 of several functions; we make it gc allocated instead. */
1409
1410static GTY(()) bitmap parallelized_functions;
1411
1412/* Returns true if FN was created by create_loop_fn. */
1413
479a6d79 1414bool
28c92cbb 1415parallelized_function_p (tree fn)
1416{
1417 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1418 return false;
1419
1420 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1421}
1422
1423/* Creates and returns an empty function that will receive the body of
1424 a parallelized loop. */
1425
1426static tree
0aecb55e 1427create_loop_fn (location_t loc)
28c92cbb 1428{
1429 char buf[100];
1430 char *tname;
1431 tree decl, type, name, t;
1432 struct function *act_cfun = cfun;
1433 static unsigned loopfn_num;
1434
5169661d 1435 loc = LOCATION_LOCUS (loc);
28c92cbb 1436 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1437 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1438 clean_symbol_name (tname);
1439 name = get_identifier (tname);
1440 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1441
0aecb55e 1442 decl = build_decl (loc, FUNCTION_DECL, name, type);
28c92cbb 1443 if (!parallelized_functions)
1444 parallelized_functions = BITMAP_GGC_ALLOC ();
1445 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1446
1447 TREE_STATIC (decl) = 1;
1448 TREE_USED (decl) = 1;
1449 DECL_ARTIFICIAL (decl) = 1;
1450 DECL_IGNORED_P (decl) = 0;
1451 TREE_PUBLIC (decl) = 0;
1452 DECL_UNINLINABLE (decl) = 1;
1453 DECL_EXTERNAL (decl) = 0;
1454 DECL_CONTEXT (decl) = NULL_TREE;
1455 DECL_INITIAL (decl) = make_node (BLOCK);
1456
0aecb55e 1457 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
28c92cbb 1458 DECL_ARTIFICIAL (t) = 1;
1459 DECL_IGNORED_P (t) = 1;
1460 DECL_RESULT (decl) = t;
1461
0aecb55e 1462 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
28c92cbb 1463 ptr_type_node);
1464 DECL_ARTIFICIAL (t) = 1;
1465 DECL_ARG_TYPE (t) = ptr_type_node;
1466 DECL_CONTEXT (t) = decl;
1467 TREE_USED (t) = 1;
1468 DECL_ARGUMENTS (decl) = t;
1469
80f2ef47 1470 allocate_struct_function (decl, false);
28c92cbb 1471
1472 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1473 it. */
c8a152f6 1474 set_cfun (act_cfun);
28c92cbb 1475
1476 return decl;
1477}
1478
28c92cbb 1479/* Moves the exit condition of LOOP to the beginning of its header, and
1480 duplicates the part of the last iteration that gets disabled to the
1481 exit of the loop. NIT is the number of iterations of the loop
1482 (used to initialize the variables in the duplicated part).
48e1416a 1483
f0b5f617 1484 TODO: the common case is that latch of the loop is empty and immediately
28c92cbb 1485 follows the loop exit. In this case, it would be better not to copy the
1486 body of the loop, but only move the entry of the loop directly before the
1487 exit check and increase the number of iterations of the loop by one.
48e1416a 1488 This may need some additional preconditioning in case NIT = ~0.
cb7f680b 1489 REDUCTION_LIST describes the reductions in LOOP. */
28c92cbb 1490
1491static void
d9dd21a8 1492transform_to_exit_first_loop (struct loop *loop,
1493 reduction_info_table_type reduction_list,
1494 tree nit)
28c92cbb 1495{
1496 basic_block *bbs, *nbbs, ex_bb, orig_header;
1497 unsigned n;
1498 bool ok;
1499 edge exit = single_dom_exit (loop), hpred;
75a70cf9 1500 tree control, control_name, res, t;
b0fb253a 1501 gimple phi, nphi, cond_stmt, stmt, cond_nit;
75a70cf9 1502 gimple_stmt_iterator gsi;
b0fb253a 1503 tree nit_1;
28c92cbb 1504
1505 split_block_after_labels (loop->header);
1506 orig_header = single_succ (loop->header);
1507 hpred = single_succ_edge (loop->header);
1508
1509 cond_stmt = last_stmt (exit->src);
75a70cf9 1510 control = gimple_cond_lhs (cond_stmt);
1511 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
28c92cbb 1512
1513 /* Make sure that we have phi nodes on exit for all loop header phis
1514 (create_parallel_loop requires that). */
75a70cf9 1515 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
28c92cbb 1516 {
75a70cf9 1517 phi = gsi_stmt (gsi);
28c92cbb 1518 res = PHI_RESULT (phi);
874117c8 1519 t = copy_ssa_name (res, phi);
28c92cbb 1520 SET_PHI_RESULT (phi, t);
28c92cbb 1521 nphi = create_phi_node (res, orig_header);
60d535d2 1522 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION);
28c92cbb 1523
1524 if (res == control)
1525 {
75a70cf9 1526 gimple_cond_set_lhs (cond_stmt, t);
28c92cbb 1527 update_stmt (cond_stmt);
1528 control = t;
1529 }
1530 }
2a556654 1531
28c92cbb 1532 bbs = get_loop_body_in_dom_order (loop);
b0fb253a 1533
89675e8c 1534 for (n = 0; bbs[n] != exit->src; n++)
1535 continue;
28c92cbb 1536 nbbs = XNEWVEC (basic_block, n);
75a70cf9 1537 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1538 bbs + 1, n, nbbs);
28c92cbb 1539 gcc_assert (ok);
1540 free (bbs);
1541 ex_bb = nbbs[0];
1542 free (nbbs);
1543
48e1416a 1544 /* Other than reductions, the only gimple reg that should be copied
75a70cf9 1545 out of the loop is the control variable. */
89675e8c 1546 exit = single_dom_exit (loop);
28c92cbb 1547 control_name = NULL_TREE;
75a70cf9 1548 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
28c92cbb 1549 {
75a70cf9 1550 phi = gsi_stmt (gsi);
28c92cbb 1551 res = PHI_RESULT (phi);
7c782c9b 1552 if (virtual_operand_p (res))
75a70cf9 1553 {
1554 gsi_next (&gsi);
1555 continue;
1556 }
28c92cbb 1557
cb7f680b 1558 /* Check if it is a part of reduction. If it is,
48e1416a 1559 keep the phi at the reduction's keep_res field. The
1560 PHI_RESULT of this phi is the resulting value of the reduction
cb7f680b 1561 variable when exiting the loop. */
1562
d9dd21a8 1563 if (reduction_list.elements () > 0)
cb7f680b 1564 {
1565 struct reduction_info *red;
1566
1567 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
cb7f680b 1568 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1569 if (red)
75a70cf9 1570 {
1571 red->keep_res = phi;
1572 gsi_next (&gsi);
1573 continue;
1574 }
cb7f680b 1575 }
75a70cf9 1576 gcc_assert (control_name == NULL_TREE
1577 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
28c92cbb 1578 control_name = res;
75a70cf9 1579 remove_phi_node (&gsi, false);
28c92cbb 1580 }
1581 gcc_assert (control_name != NULL_TREE);
28c92cbb 1582
48e1416a 1583 /* Initialize the control variable to number of iterations
b0fb253a 1584 according to the rhs of the exit condition. */
75a70cf9 1585 gsi = gsi_after_labels (ex_bb);
48e1416a 1586 cond_nit = last_stmt (exit->src);
b0fb253a 1587 nit_1 = gimple_cond_rhs (cond_nit);
1588 nit_1 = force_gimple_operand_gsi (&gsi,
1589 fold_convert (TREE_TYPE (control_name), nit_1),
75a70cf9 1590 false, NULL_TREE, false, GSI_SAME_STMT);
b0fb253a 1591 stmt = gimple_build_assign (control_name, nit_1);
75a70cf9 1592 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1593 SSA_NAME_DEF_STMT (control_name) = stmt;
28c92cbb 1594}
1595
1596/* Create the parallel constructs for LOOP as described in gen_parallel_loop.
75a70cf9 1597 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
28c92cbb 1598 NEW_DATA is the variable that should be initialized from the argument
1599 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
75a70cf9 1600 basic block containing GIMPLE_OMP_PARALLEL tree. */
28c92cbb 1601
1602static basic_block
1603create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
0aecb55e 1604 tree new_data, unsigned n_threads, location_t loc)
28c92cbb 1605{
75a70cf9 1606 gimple_stmt_iterator gsi;
28c92cbb 1607 basic_block bb, paral_bb, for_bb, ex_bb;
f018d957 1608 tree t, param;
75a70cf9 1609 gimple stmt, for_stmt, phi, cond_stmt;
1610 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
28c92cbb 1611 edge exit, nexit, guard, end, e;
1612
75a70cf9 1613 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
28c92cbb 1614 bb = loop_preheader_edge (loop)->src;
1615 paral_bb = single_pred (bb);
75a70cf9 1616 gsi = gsi_last_bb (paral_bb);
28c92cbb 1617
0aecb55e 1618 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
28c92cbb 1619 OMP_CLAUSE_NUM_THREADS_EXPR (t)
cb7f680b 1620 = build_int_cst (integer_type_node, n_threads);
75a70cf9 1621 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
0aecb55e 1622 gimple_set_location (stmt, loc);
28c92cbb 1623
75a70cf9 1624 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
28c92cbb 1625
1626 /* Initialize NEW_DATA. */
1627 if (data)
1628 {
75a70cf9 1629 gsi = gsi_after_labels (bb);
1630
1631 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1632 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1633 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1634 SSA_NAME_DEF_STMT (param) = stmt;
1635
1636 stmt = gimple_build_assign (new_data,
1637 fold_convert (TREE_TYPE (new_data), param));
1638 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1639 SSA_NAME_DEF_STMT (new_data) = stmt;
28c92cbb 1640 }
1641
75a70cf9 1642 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
28c92cbb 1643 bb = split_loop_exit_edge (single_dom_exit (loop));
75a70cf9 1644 gsi = gsi_last_bb (bb);
0aecb55e 1645 stmt = gimple_build_omp_return (false);
1646 gimple_set_location (stmt, loc);
1647 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
28c92cbb 1648
75a70cf9 1649 /* Extract data for GIMPLE_OMP_FOR. */
28c92cbb 1650 gcc_assert (loop->header == single_dom_exit (loop)->src);
75a70cf9 1651 cond_stmt = last_stmt (loop->header);
28c92cbb 1652
75a70cf9 1653 cvar = gimple_cond_lhs (cond_stmt);
28c92cbb 1654 cvar_base = SSA_NAME_VAR (cvar);
1655 phi = SSA_NAME_DEF_STMT (cvar);
1656 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
874117c8 1657 initvar = copy_ssa_name (cvar, NULL);
28c92cbb 1658 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1659 initvar);
1660 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1661
4bcf12c5 1662 gsi = gsi_last_nondebug_bb (loop->latch);
75a70cf9 1663 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1664 gsi_remove (&gsi, true);
28c92cbb 1665
1666 /* Prepare cfg. */
1667 for_bb = split_edge (loop_preheader_edge (loop));
1668 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1669 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1670 gcc_assert (exit == single_dom_exit (loop));
1671
1672 guard = make_edge (for_bb, ex_bb, 0);
1673 single_succ_edge (loop->latch)->flags = 0;
1674 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
75a70cf9 1675 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
28c92cbb 1676 {
efbcb6de 1677 source_location locus;
1678 tree def;
75a70cf9 1679 phi = gsi_stmt (gsi);
75a70cf9 1680 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
efbcb6de 1681
1682 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
48e1416a 1683 locus = gimple_phi_arg_location_from_edge (stmt,
efbcb6de 1684 loop_preheader_edge (loop));
60d535d2 1685 add_phi_arg (phi, def, guard, locus);
efbcb6de 1686
1687 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1688 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
60d535d2 1689 add_phi_arg (phi, def, end, locus);
28c92cbb 1690 }
1691 e = redirect_edge_and_branch (exit, nexit->dest);
1692 PENDING_STMT (e) = NULL;
1693
75a70cf9 1694 /* Emit GIMPLE_OMP_FOR. */
1695 gimple_cond_set_lhs (cond_stmt, cvar_base);
28c92cbb 1696 type = TREE_TYPE (cvar);
0aecb55e 1697 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
28c92cbb 1698 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1699
3d483a94 1700 for_stmt = gimple_build_omp_for (NULL, GF_OMP_FOR_KIND_FOR, t, 1, NULL);
0aecb55e 1701 gimple_set_location (for_stmt, loc);
75a70cf9 1702 gimple_omp_for_set_index (for_stmt, 0, initvar);
1703 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1704 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1705 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1706 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1707 cvar_base,
1708 build_int_cst (type, 1)));
1709
1710 gsi = gsi_last_bb (for_bb);
1711 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
28c92cbb 1712 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1713
75a70cf9 1714 /* Emit GIMPLE_OMP_CONTINUE. */
1715 gsi = gsi_last_bb (loop->latch);
1716 stmt = gimple_build_omp_continue (cvar_next, cvar);
0aecb55e 1717 gimple_set_location (stmt, loc);
75a70cf9 1718 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1719 SSA_NAME_DEF_STMT (cvar_next) = stmt;
28c92cbb 1720
75a70cf9 1721 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1722 gsi = gsi_last_bb (ex_bb);
0aecb55e 1723 stmt = gimple_build_omp_return (true);
1724 gimple_set_location (stmt, loc);
1725 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
28c92cbb 1726
821ac701 1727 /* After the above dom info is hosed. Re-compute it. */
1728 free_dominance_info (CDI_DOMINATORS);
1729 calculate_dominance_info (CDI_DOMINATORS);
1730
28c92cbb 1731 return paral_bb;
1732}
1733
5fa90eea 1734/* Generates code to execute the iterations of LOOP in N_THREADS
1735 threads in parallel.
1736
1737 NITER describes number of iterations of LOOP.
f0b5f617 1738 REDUCTION_LIST describes the reductions existent in the LOOP. */
28c92cbb 1739
1740static void
d9dd21a8 1741gen_parallel_loop (struct loop *loop, reduction_info_table_type reduction_list,
cb7f680b 1742 unsigned n_threads, struct tree_niter_desc *niter)
28c92cbb 1743{
6f22df65 1744 loop_iterator li;
28c92cbb 1745 tree many_iterations_cond, type, nit;
75a70cf9 1746 tree arg_struct, new_arg_struct;
1747 gimple_seq stmts;
28c92cbb 1748 basic_block parallel_head;
e06f9c34 1749 edge entry, exit;
cb7f680b 1750 struct clsn_data clsn_data;
28c92cbb 1751 unsigned prob;
0aecb55e 1752 location_t loc;
1753 gimple cond_stmt;
362dc73c 1754 unsigned int m_p_thread=2;
28c92cbb 1755
1756 /* From
1757
1758 ---------------------------------------------------------------------
1759 loop
1760 {
1761 IV = phi (INIT, IV + STEP)
1762 BODY1;
1763 if (COND)
1764 break;
1765 BODY2;
1766 }
1767 ---------------------------------------------------------------------
1768
1769 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1770 we generate the following code:
1771
1772 ---------------------------------------------------------------------
1773
1774 if (MAY_BE_ZERO
cb7f680b 1775 || NITER < MIN_PER_THREAD * N_THREADS)
1776 goto original;
28c92cbb 1777
1778 BODY1;
1779 store all local loop-invariant variables used in body of the loop to DATA.
75a70cf9 1780 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
28c92cbb 1781 load the variables from DATA.
75a70cf9 1782 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
28c92cbb 1783 BODY2;
1784 BODY1;
75a70cf9 1785 GIMPLE_OMP_CONTINUE;
1786 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1787 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
28c92cbb 1788 goto end;
1789
1790 original:
1791 loop
1792 {
1793 IV = phi (INIT, IV + STEP)
1794 BODY1;
1795 if (COND)
1796 break;
1797 BODY2;
1798 }
1799
1800 end:
1801
1802 */
1803
1804 /* Create two versions of the loop -- in the old one, we know that the
1805 number of iterations is large enough, and we will transform it into the
1806 loop that will be split to loop_fn, the new one will be used for the
1807 remaining iterations. */
cb7f680b 1808
362dc73c 1809 /* We should compute a better number-of-iterations value for outer loops.
1810 That is, if we have
1811
1812 for (i = 0; i < n; ++i)
1813 for (j = 0; j < m; ++j)
1814 ...
1815
1816 we should compute nit = n * m, not nit = n.
1817 Also may_be_zero handling would need to be adjusted. */
1818
28c92cbb 1819 type = TREE_TYPE (niter->niter);
1820 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1821 NULL_TREE);
1822 if (stmts)
75a70cf9 1823 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
28c92cbb 1824
362dc73c 1825 if (loop->inner)
1826 m_p_thread=2;
1827 else
1828 m_p_thread=MIN_PER_THREAD;
1829
1830 many_iterations_cond =
1831 fold_build2 (GE_EXPR, boolean_type_node,
1832 nit, build_int_cst (type, m_p_thread * n_threads));
1833
28c92cbb 1834 many_iterations_cond
cb7f680b 1835 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1836 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1837 many_iterations_cond);
28c92cbb 1838 many_iterations_cond
cb7f680b 1839 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
28c92cbb 1840 if (stmts)
75a70cf9 1841 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
28c92cbb 1842 if (!is_gimple_condexpr (many_iterations_cond))
1843 {
1844 many_iterations_cond
cb7f680b 1845 = force_gimple_operand (many_iterations_cond, &stmts,
1846 true, NULL_TREE);
28c92cbb 1847 if (stmts)
75a70cf9 1848 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
28c92cbb 1849 }
1850
1851 initialize_original_copy_tables ();
1852
1853 /* We assume that the loop usually iterates a lot. */
1854 prob = 4 * REG_BR_PROB_BASE / 5;
f018d957 1855 loop_version (loop, many_iterations_cond, NULL,
1856 prob, prob, REG_BR_PROB_BASE - prob, true);
28c92cbb 1857 update_ssa (TODO_update_ssa);
1858 free_original_copy_tables ();
1859
1860 /* Base all the induction variables in LOOP on a single control one. */
0207206d 1861 canonicalize_loop_ivs (loop, &nit, true);
28c92cbb 1862
1863 /* Ensure that the exit condition is the first statement in the loop. */
cb7f680b 1864 transform_to_exit_first_loop (loop, reduction_list, nit);
1865
f0b5f617 1866 /* Generate initializations for reductions. */
d9dd21a8 1867 if (reduction_list.elements () > 0)
1868 reduction_list.traverse <struct loop *, initialize_reductions> (loop);
28c92cbb 1869
1870 /* Eliminate the references to local variables from the loop. */
e06f9c34 1871 gcc_assert (single_exit (loop));
1872 entry = loop_preheader_edge (loop);
1873 exit = single_dom_exit (loop);
28c92cbb 1874
e06f9c34 1875 eliminate_local_variables (entry, exit);
28c92cbb 1876 /* In the old loop, move all variables non-local to the loop to a structure
1877 and back, and create separate decls for the variables used in loop. */
48e1416a 1878 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
e06f9c34 1879 &new_arg_struct, &clsn_data);
28c92cbb 1880
1881 /* Create the parallel constructs. */
0aecb55e 1882 loc = UNKNOWN_LOCATION;
1883 cond_stmt = last_stmt (loop->header);
1884 if (cond_stmt)
1885 loc = gimple_location (cond_stmt);
1886 parallel_head = create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
1887 new_arg_struct, n_threads, loc);
d9dd21a8 1888 if (reduction_list.elements () > 0)
cb7f680b 1889 create_call_for_reduction (loop, reduction_list, &clsn_data);
28c92cbb 1890
1891 scev_reset ();
1892
1893 /* Cancel the loop (it is simpler to do it here rather than to teach the
1894 expander to do it). */
1895 cancel_loop_tree (loop);
1896
d46d3c1c 1897 /* Free loop bound estimations that could contain references to
1898 removed statements. */
1899 FOR_EACH_LOOP (li, loop, 0)
1900 free_numbers_of_iterations_estimates_loop (loop);
1901
28c92cbb 1902 /* Expand the parallel constructs. We do it directly here instead of running
1903 a separate expand_omp pass, since it is more efficient, and less likely to
1904 cause troubles with further analyses not being able to deal with the
1905 OMP trees. */
cb7f680b 1906
28c92cbb 1907 omp_expand_local (parallel_head);
1908}
1909
c968a07c 1910/* Returns true when LOOP contains vector phi nodes. */
1911
1912static bool
75a70cf9 1913loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
c968a07c 1914{
1915 unsigned i;
1916 basic_block *bbs = get_loop_body_in_dom_order (loop);
75a70cf9 1917 gimple_stmt_iterator gsi;
c968a07c 1918 bool res = true;
c968a07c 1919
1920 for (i = 0; i < loop->num_nodes; i++)
75a70cf9 1921 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1922 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
c968a07c 1923 goto end;
1924
1925 res = false;
1926 end:
1927 free (bbs);
1928 return res;
1929}
1930
5fa90eea 1931/* Create a reduction_info struct, initialize it with REDUC_STMT
1932 and PHI, insert it to the REDUCTION_LIST. */
1933
1934static void
d9dd21a8 1935build_new_reduction (reduction_info_table_type reduction_list,
1936 gimple reduc_stmt, gimple phi)
5fa90eea 1937{
d9dd21a8 1938 reduction_info **slot;
5fa90eea 1939 struct reduction_info *new_reduction;
1940
1941 gcc_assert (reduc_stmt);
48e1416a 1942
5fa90eea 1943 if (dump_file && (dump_flags & TDF_DETAILS))
1944 {
1945 fprintf (dump_file,
1946 "Detected reduction. reduction stmt is: \n");
1947 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1948 fprintf (dump_file, "\n");
1949 }
48e1416a 1950
5fa90eea 1951 new_reduction = XCNEW (struct reduction_info);
48e1416a 1952
5fa90eea 1953 new_reduction->reduc_stmt = reduc_stmt;
1954 new_reduction->reduc_phi = phi;
71fa519d 1955 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
5fa90eea 1956 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
d9dd21a8 1957 slot = reduction_list.find_slot (new_reduction, INSERT);
5fa90eea 1958 *slot = new_reduction;
1959}
1960
71fa519d 1961/* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1962
d9dd21a8 1963int
1964set_reduc_phi_uids (reduction_info **slot, void *data ATTRIBUTE_UNUSED)
71fa519d 1965{
d9dd21a8 1966 struct reduction_info *const red = *slot;
71fa519d 1967 gimple_set_uid (red->reduc_phi, red->reduc_version);
1968 return 1;
1969}
1970
5fa90eea 1971/* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1972
1973static void
d9dd21a8 1974gather_scalar_reductions (loop_p loop, reduction_info_table_type reduction_list)
5fa90eea 1975{
1976 gimple_stmt_iterator gsi;
1977 loop_vec_info simple_loop_info;
1978
5fa90eea 1979 simple_loop_info = vect_analyze_loop_form (loop);
1980
1981 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1982 {
1983 gimple phi = gsi_stmt (gsi);
1984 affine_iv iv;
1985 tree res = PHI_RESULT (phi);
1986 bool double_reduc;
1987
7c782c9b 1988 if (virtual_operand_p (res))
5fa90eea 1989 continue;
1990
1991 if (!simple_iv (loop, loop, res, &iv, true)
1992 && simple_loop_info)
1993 {
f4a50267 1994 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
1995 phi, true,
1996 &double_reduc);
b0fb253a 1997 if (reduc_stmt && !double_reduc)
5fa90eea 1998 build_new_reduction (reduction_list, reduc_stmt, phi);
1999 }
2000 }
71fa519d 2001 destroy_loop_vec_info (simple_loop_info, true);
2002
2003 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
2004 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
2005 only now. */
d9dd21a8 2006 reduction_list.traverse <void *, set_reduc_phi_uids> (NULL);
5fa90eea 2007}
2008
2009/* Try to initialize NITER for code generation part. */
2010
2011static bool
2012try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2013{
2014 edge exit = single_dom_exit (loop);
2015
2016 gcc_assert (exit);
2017
2018 /* We need to know # of iterations, and there should be no uses of values
2019 defined inside loop outside of it, unless the values are invariants of
2020 the loop. */
2021 if (!number_of_iterations_exit (loop, exit, niter, false))
2022 {
2023 if (dump_file && (dump_flags & TDF_DETAILS))
2024 fprintf (dump_file, " FAILED: number of iterations not known\n");
2025 return false;
2026 }
2027
2028 return true;
2029}
2030
2031/* Try to initialize REDUCTION_LIST for code generation part.
2032 REDUCTION_LIST describes the reductions. */
2033
2034static bool
d9dd21a8 2035try_create_reduction_list (loop_p loop,
2036 reduction_info_table_type reduction_list)
5fa90eea 2037{
2038 edge exit = single_dom_exit (loop);
2039 gimple_stmt_iterator gsi;
2040
2041 gcc_assert (exit);
2042
2043 gather_scalar_reductions (loop, reduction_list);
2044
48e1416a 2045
5fa90eea 2046 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2047 {
2048 gimple phi = gsi_stmt (gsi);
2049 struct reduction_info *red;
2050 imm_use_iterator imm_iter;
2051 use_operand_p use_p;
2052 gimple reduc_phi;
2053 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2054
7c782c9b 2055 if (!virtual_operand_p (val))
5fa90eea 2056 {
2057 if (dump_file && (dump_flags & TDF_DETAILS))
2058 {
2059 fprintf (dump_file, "phi is ");
2060 print_gimple_stmt (dump_file, phi, 0, 0);
2061 fprintf (dump_file, "arg of phi to exit: value ");
2062 print_generic_expr (dump_file, val, 0);
2063 fprintf (dump_file, " used outside loop\n");
2064 fprintf (dump_file,
2065 " checking if it a part of reduction pattern: \n");
2066 }
d9dd21a8 2067 if (reduction_list.elements () == 0)
5fa90eea 2068 {
2069 if (dump_file && (dump_flags & TDF_DETAILS))
2070 fprintf (dump_file,
2071 " FAILED: it is not a part of reduction.\n");
2072 return false;
2073 }
2074 reduc_phi = NULL;
2075 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2076 {
43989e16 2077 if (!gimple_debug_bind_p (USE_STMT (use_p))
2078 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
5fa90eea 2079 {
2080 reduc_phi = USE_STMT (use_p);
2081 break;
2082 }
2083 }
2084 red = reduction_phi (reduction_list, reduc_phi);
2085 if (red == NULL)
2086 {
2087 if (dump_file && (dump_flags & TDF_DETAILS))
2088 fprintf (dump_file,
2089 " FAILED: it is not a part of reduction.\n");
2090 return false;
2091 }
2092 if (dump_file && (dump_flags & TDF_DETAILS))
2093 {
2094 fprintf (dump_file, "reduction phi is ");
2095 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2096 fprintf (dump_file, "reduction stmt is ");
2097 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2098 }
2099 }
2100 }
2101
2102 /* The iterations of the loop may communicate only through bivs whose
2103 iteration space can be distributed efficiently. */
2104 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2105 {
2106 gimple phi = gsi_stmt (gsi);
2107 tree def = PHI_RESULT (phi);
2108 affine_iv iv;
2109
7c782c9b 2110 if (!virtual_operand_p (def) && !simple_iv (loop, loop, def, &iv, true))
5fa90eea 2111 {
2112 struct reduction_info *red;
2113
2114 red = reduction_phi (reduction_list, phi);
2115 if (red == NULL)
2116 {
2117 if (dump_file && (dump_flags & TDF_DETAILS))
2118 fprintf (dump_file,
2119 " FAILED: scalar dependency between iterations\n");
2120 return false;
2121 }
2122 }
2123 }
2124
2125
2126 return true;
2127}
2128
28c92cbb 2129/* Detect parallel loops and generate parallel code using libgomp
2130 primitives. Returns true if some loop was parallelized, false
2131 otherwise. */
2132
2133bool
2134parallelize_loops (void)
2135{
2136 unsigned n_threads = flag_tree_parallelize_loops;
2137 bool changed = false;
2138 struct loop *loop;
2139 struct tree_niter_desc niter_desc;
2140 loop_iterator li;
d9dd21a8 2141 reduction_info_table_type reduction_list;
1e33ad50 2142 struct obstack parloop_obstack;
fbbe5b51 2143 HOST_WIDE_INT estimated;
2144 LOC loop_loc;
1e33ad50 2145
28c92cbb 2146 /* Do not parallelize loops in the functions created by parallelization. */
2147 if (parallelized_function_p (cfun->decl))
2148 return false;
fbbe5b51 2149 if (cfun->has_nonlocal_label)
2150 return false;
28c92cbb 2151
1e33ad50 2152 gcc_obstack_init (&parloop_obstack);
d9dd21a8 2153 reduction_list.create (10);
75a70cf9 2154 init_stmt_vec_info_vec ();
cb7f680b 2155
28c92cbb 2156 FOR_EACH_LOOP (li, loop, 0)
2157 {
d9dd21a8 2158 reduction_list.empty ();
b0fb253a 2159 if (dump_file && (dump_flags & TDF_DETAILS))
2160 {
2161 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2162 if (loop->inner)
2163 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2164 else
2165 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2166 }
48e1416a 2167
b0fb253a 2168 /* If we use autopar in graphite pass, we use its marked dependency
525c22c4 2169 checking results. */
2170 if (flag_loop_parallelize_all && !loop->can_be_parallel)
b0fb253a 2171 {
2172 if (dump_file && (dump_flags & TDF_DETAILS))
2173 fprintf (dump_file, "loop is not parallel according to graphite\n");
525c22c4 2174 continue;
b0fb253a 2175 }
525c22c4 2176
b0fb253a 2177 if (!single_dom_exit (loop))
2178 {
48e1416a 2179
b0fb253a 2180 if (dump_file && (dump_flags & TDF_DETAILS))
2181 fprintf (dump_file, "loop is !single_dom_exit\n");
48e1416a 2182
5fa90eea 2183 continue;
b0fb253a 2184 }
5fa90eea 2185
2186 if (/* And of course, the loop must be parallelizable. */
2187 !can_duplicate_loop_p (loop)
d4fcfd16 2188 || loop_has_blocks_with_irreducible_flag (loop)
fbbe5b51 2189 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
c968a07c 2190 /* FIXME: the check for vector phi nodes could be removed. */
89675e8c 2191 || loop_has_vector_phi_nodes (loop))
5fa90eea 2192 continue;
b0b097b4 2193
fee017b3 2194 estimated = estimated_stmt_executions_int (loop);
b0b097b4 2195 if (estimated == -1)
2196 estimated = max_stmt_executions_int (loop);
525c22c4 2197 /* FIXME: Bypass this check as graphite doesn't update the
b0b097b4 2198 count and frequency correctly now. */
525c22c4 2199 if (!flag_loop_parallelize_all
b0b097b4 2200 && ((estimated != -1
2201 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
525c22c4 2202 /* Do not bother with loops in cold areas. */
2203 || optimize_loop_nest_for_size_p (loop)))
5fa90eea 2204 continue;
48e1416a 2205
5fa90eea 2206 if (!try_get_loop_niter (loop, &niter_desc))
2207 continue;
2208
2209 if (!try_create_reduction_list (loop, reduction_list))
2210 continue;
2211
1e33ad50 2212 if (!flag_loop_parallelize_all
2213 && !loop_parallel_p (loop, &parloop_obstack))
28c92cbb 2214 continue;
2215
2216 changed = true;
b0fb253a 2217 if (dump_file && (dump_flags & TDF_DETAILS))
2218 {
b0fb253a 2219 if (loop->inner)
fbbe5b51 2220 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
b0fb253a 2221 else
fbbe5b51 2222 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2223 loop_loc = find_loop_location (loop);
2224 if (loop_loc != UNKNOWN_LOC)
2225 fprintf (dump_file, "\nloop at %s:%d: ",
2226 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
48e1416a 2227 }
2228 gen_parallel_loop (loop, reduction_list,
5fa90eea 2229 n_threads, &niter_desc);
28c92cbb 2230 }
2231
75a70cf9 2232 free_stmt_vec_info_vec ();
d9dd21a8 2233 reduction_list.dispose ();
1e33ad50 2234 obstack_free (&parloop_obstack, NULL);
7f81b5ee 2235
2236 /* Parallelization will cause new function calls to be inserted through
cb245216 2237 which local variables will escape. Reset the points-to solution
2238 for ESCAPED. */
7f81b5ee 2239 if (changed)
cb245216 2240 pt_solution_reset (&cfun->gimple_df->escaped);
7f81b5ee 2241
28c92cbb 2242 return changed;
2243}
2244
64641360 2245/* Parallelization. */
2246
2247static bool
2248gate_tree_parallelize_loops (void)
2249{
2250 return flag_tree_parallelize_loops > 1;
2251}
2252
2253static unsigned
2254tree_parallelize_loops (void)
2255{
2256 if (number_of_loops (cfun) <= 1)
2257 return 0;
2258
2259 if (parallelize_loops ())
2260 return TODO_cleanup_cfg | TODO_rebuild_alias;
2261 return 0;
2262}
2263
2264namespace {
2265
2266const pass_data pass_data_parallelize_loops =
2267{
2268 GIMPLE_PASS, /* type */
2269 "parloops", /* name */
2270 OPTGROUP_LOOP, /* optinfo_flags */
2271 true, /* has_gate */
2272 true, /* has_execute */
2273 TV_TREE_PARALLELIZE_LOOPS, /* tv_id */
2274 ( PROP_cfg | PROP_ssa ), /* properties_required */
2275 0, /* properties_provided */
2276 0, /* properties_destroyed */
2277 0, /* todo_flags_start */
2278 TODO_verify_flow, /* todo_flags_finish */
2279};
2280
2281class pass_parallelize_loops : public gimple_opt_pass
2282{
2283public:
2284 pass_parallelize_loops (gcc::context *ctxt)
2285 : gimple_opt_pass (pass_data_parallelize_loops, ctxt)
2286 {}
2287
2288 /* opt_pass methods: */
2289 bool gate () { return gate_tree_parallelize_loops (); }
2290 unsigned int execute () { return tree_parallelize_loops (); }
2291
2292}; // class pass_parallelize_loops
2293
2294} // anon namespace
2295
2296gimple_opt_pass *
2297make_pass_parallelize_loops (gcc::context *ctxt)
2298{
2299 return new pass_parallelize_loops (ctxt);
2300}
2301
2302
28c92cbb 2303#include "gt-tree-parloops.h"