]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-parloops.c
[multiple changes]
[thirdparty/gcc.git] / gcc / tree-parloops.c
CommitLineData
5f40b3cb 1/* Loop autoparallelization.
e08120b1 2 Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011, 2012
c75c517d 3 Free Software Foundation, Inc.
70837b71
RL
4 Contributed by Sebastian Pop <pop@cri.ensmp.fr>
5 Zdenek Dvorak <dvorakz@suse.cz> and Razya Ladelsky <razya@il.ibm.com>.
5f40b3cb
ZD
6
7This file is part of GCC.
8
9GCC is free software; you can redistribute it and/or modify it under
10the terms of the GNU General Public License as published by the Free
6da7fc87 11Software Foundation; either version 3, or (at your option) any later
5f40b3cb
ZD
12version.
13
14GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15WARRANTY; without even the implied warranty of MERCHANTABILITY or
16FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17for more details.
18
19You should have received a copy of the GNU General Public License
6da7fc87
NC
20along with GCC; see the file COPYING3. If not see
21<http://www.gnu.org/licenses/>. */
5f40b3cb
ZD
22
23#include "config.h"
24#include "system.h"
25#include "coretypes.h"
5f40b3cb
ZD
26#include "tree-flow.h"
27#include "cfgloop.h"
5f40b3cb 28#include "tree-data-ref.h"
1bd6497c 29#include "tree-scalar-evolution.h"
cf835838 30#include "gimple-pretty-print.h"
5f40b3cb 31#include "tree-pass.h"
5f40b3cb 32#include "langhooks.h"
a509ebb5 33#include "tree-vectorizer.h"
5f40b3cb
ZD
34
35/* This pass tries to distribute iterations of loops into several threads.
36 The implementation is straightforward -- for each loop we test whether its
37 iterations are independent, and if it is the case (and some additional
38 conditions regarding profitability and correctness are satisfied), we
726a989a
RB
39 add GIMPLE_OMP_PARALLEL and GIMPLE_OMP_FOR codes and let omp expansion
40 machinery do its job.
b8698a0f 41
5f40b3cb
ZD
42 The most of the complexity is in bringing the code into shape expected
43 by the omp expanders:
726a989a
RB
44 -- for GIMPLE_OMP_FOR, ensuring that the loop has only one induction
45 variable and that the exit test is at the start of the loop body
46 -- for GIMPLE_OMP_PARALLEL, replacing the references to local addressable
5f40b3cb
ZD
47 variables by accesses through pointers, and breaking up ssa chains
48 by storing the values incoming to the parallelized loop to a structure
49 passed to the new function as an argument (something similar is done
50 in omp gimplification, unfortunately only a small part of the code
51 can be shared).
52
53 TODO:
54 -- if there are several parallelizable loops in a function, it may be
55 possible to generate the threads just once (using synchronization to
56 ensure that cross-loop dependences are obeyed).
70837b71
RL
57 -- handling of common reduction patterns for outer loops.
58
59 More info can also be found at http://gcc.gnu.org/wiki/AutoParInGCC */
b8698a0f 60/*
a509ebb5 61 Reduction handling:
8a9ecffd 62 currently we use vect_force_simple_reduction() to detect reduction patterns.
a509ebb5 63 The code transformation will be introduced by an example.
b8698a0f
L
64
65
a509ebb5
RL
66parloop
67{
68 int sum=1;
69
0eb7e7aa 70 for (i = 0; i < N; i++)
a509ebb5
RL
71 {
72 x[i] = i + 3;
73 sum+=x[i];
74 }
75}
76
0eb7e7aa 77gimple-like code:
a509ebb5
RL
78header_bb:
79
0eb7e7aa
RL
80 # sum_29 = PHI <sum_11(5), 1(3)>
81 # i_28 = PHI <i_12(5), 0(3)>
82 D.1795_8 = i_28 + 3;
83 x[i_28] = D.1795_8;
84 sum_11 = D.1795_8 + sum_29;
85 i_12 = i_28 + 1;
86 if (N_6(D) > i_12)
87 goto header_bb;
88
a509ebb5
RL
89
90exit_bb:
91
0eb7e7aa
RL
92 # sum_21 = PHI <sum_11(4)>
93 printf (&"%d"[0], sum_21);
a509ebb5
RL
94
95
96after reduction transformation (only relevant parts):
97
98parloop
99{
100
101....
102
0eb7e7aa 103
fa10beec 104 # Storing the initial value given by the user. #
0eb7e7aa 105
ae0bce62 106 .paral_data_store.32.sum.27 = 1;
b8698a0f
L
107
108 #pragma omp parallel num_threads(4)
a509ebb5 109
0eb7e7aa 110 #pragma omp for schedule(static)
ae0bce62
RL
111
112 # The neutral element corresponding to the particular
113 reduction's operation, e.g. 0 for PLUS_EXPR,
114 1 for MULT_EXPR, etc. replaces the user's initial value. #
115
116 # sum.27_29 = PHI <sum.27_11, 0>
117
0eb7e7aa 118 sum.27_11 = D.1827_8 + sum.27_29;
ae0bce62 119
726a989a 120 GIMPLE_OMP_CONTINUE
a509ebb5 121
0eb7e7aa
RL
122 # Adding this reduction phi is done at create_phi_for_local_result() #
123 # sum.27_56 = PHI <sum.27_11, 0>
726a989a 124 GIMPLE_OMP_RETURN
b8698a0f
L
125
126 # Creating the atomic operation is done at
0eb7e7aa 127 create_call_for_reduction_1() #
a509ebb5 128
0eb7e7aa
RL
129 #pragma omp atomic_load
130 D.1839_59 = *&.paral_data_load.33_51->reduction.23;
131 D.1840_60 = sum.27_56 + D.1839_59;
132 #pragma omp atomic_store (D.1840_60);
b8698a0f 133
726a989a 134 GIMPLE_OMP_RETURN
b8698a0f 135
0eb7e7aa
RL
136 # collecting the result after the join of the threads is done at
137 create_loads_for_reductions().
ae0bce62
RL
138 The value computed by the threads is loaded from the
139 shared struct. #
140
b8698a0f 141
0eb7e7aa 142 .paral_data_load.33_52 = &.paral_data_store.32;
ae0bce62 143 sum_37 = .paral_data_load.33_52->sum.27;
0eb7e7aa
RL
144 sum_43 = D.1795_41 + sum_37;
145
146 exit bb:
147 # sum_21 = PHI <sum_43, sum_26>
148 printf (&"%d"[0], sum_21);
149
150...
151
a509ebb5
RL
152}
153
154*/
155
5f40b3cb
ZD
156/* Minimal number of iterations of a loop that should be executed in each
157 thread. */
158#define MIN_PER_THREAD 100
159
b8698a0f 160/* Element of the hashtable, representing a
a509ebb5
RL
161 reduction in the current loop. */
162struct reduction_info
163{
726a989a
RB
164 gimple reduc_stmt; /* reduction statement. */
165 gimple reduc_phi; /* The phi node defining the reduction. */
166 enum tree_code reduction_code;/* code for the reduction operation. */
5d1fd1de
JJ
167 unsigned reduc_version; /* SSA_NAME_VERSION of original reduc_phi
168 result. */
b8698a0f 169 gimple keep_res; /* The PHI_RESULT of this phi is the resulting value
a509ebb5 170 of the reduction variable when existing the loop. */
ae0bce62 171 tree initial_value; /* The initial value of the reduction var before entering the loop. */
a509ebb5 172 tree field; /* the name of the field in the parloop data structure intended for reduction. */
a509ebb5 173 tree init; /* reduction initialization value. */
b8698a0f 174 gimple new_phi; /* (helper field) Newly created phi node whose result
a509ebb5
RL
175 will be passed to the atomic operation. Represents
176 the local result each thread computed for the reduction
177 operation. */
178};
179
180/* Equality and hash functions for hashtab code. */
181
182static int
183reduction_info_eq (const void *aa, const void *bb)
184{
185 const struct reduction_info *a = (const struct reduction_info *) aa;
186 const struct reduction_info *b = (const struct reduction_info *) bb;
187
188 return (a->reduc_phi == b->reduc_phi);
189}
190
191static hashval_t
192reduction_info_hash (const void *aa)
193{
194 const struct reduction_info *a = (const struct reduction_info *) aa;
195
5d1fd1de 196 return a->reduc_version;
a509ebb5
RL
197}
198
199static struct reduction_info *
726a989a 200reduction_phi (htab_t reduction_list, gimple phi)
a509ebb5
RL
201{
202 struct reduction_info tmpred, *red;
203
87ebde38 204 if (htab_elements (reduction_list) == 0 || phi == NULL)
a509ebb5
RL
205 return NULL;
206
207 tmpred.reduc_phi = phi;
5d1fd1de 208 tmpred.reduc_version = gimple_uid (phi);
3d9a9f94 209 red = (struct reduction_info *) htab_find (reduction_list, &tmpred);
a509ebb5
RL
210
211 return red;
212}
213
5f40b3cb
ZD
214/* Element of hashtable of names to copy. */
215
216struct name_to_copy_elt
217{
218 unsigned version; /* The version of the name to copy. */
219 tree new_name; /* The new name used in the copy. */
220 tree field; /* The field of the structure used to pass the
221 value. */
222};
223
224/* Equality and hash functions for hashtab code. */
225
226static int
227name_to_copy_elt_eq (const void *aa, const void *bb)
228{
a509ebb5
RL
229 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
230 const struct name_to_copy_elt *b = (const struct name_to_copy_elt *) bb;
5f40b3cb
ZD
231
232 return a->version == b->version;
233}
234
235static hashval_t
236name_to_copy_elt_hash (const void *aa)
237{
a509ebb5 238 const struct name_to_copy_elt *a = (const struct name_to_copy_elt *) aa;
5f40b3cb
ZD
239
240 return (hashval_t) a->version;
241}
242
b305e3da
SP
243/* A transformation matrix, which is a self-contained ROWSIZE x COLSIZE
244 matrix. Rather than use floats, we simply keep a single DENOMINATOR that
245 represents the denominator for every element in the matrix. */
246typedef struct lambda_trans_matrix_s
247{
248 lambda_matrix matrix;
249 int rowsize;
250 int colsize;
251 int denominator;
252} *lambda_trans_matrix;
253#define LTM_MATRIX(T) ((T)->matrix)
254#define LTM_ROWSIZE(T) ((T)->rowsize)
255#define LTM_COLSIZE(T) ((T)->colsize)
256#define LTM_DENOMINATOR(T) ((T)->denominator)
257
258/* Allocate a new transformation matrix. */
259
260static lambda_trans_matrix
261lambda_trans_matrix_new (int colsize, int rowsize,
262 struct obstack * lambda_obstack)
263{
264 lambda_trans_matrix ret;
265
266 ret = (lambda_trans_matrix)
267 obstack_alloc (lambda_obstack, sizeof (struct lambda_trans_matrix_s));
268 LTM_MATRIX (ret) = lambda_matrix_new (rowsize, colsize, lambda_obstack);
269 LTM_ROWSIZE (ret) = rowsize;
270 LTM_COLSIZE (ret) = colsize;
271 LTM_DENOMINATOR (ret) = 1;
272 return ret;
273}
274
275/* Multiply a vector VEC by a matrix MAT.
276 MAT is an M*N matrix, and VEC is a vector with length N. The result
277 is stored in DEST which must be a vector of length M. */
278
279static void
280lambda_matrix_vector_mult (lambda_matrix matrix, int m, int n,
281 lambda_vector vec, lambda_vector dest)
282{
283 int i, j;
284
285 lambda_vector_clear (dest, m);
286 for (i = 0; i < m; i++)
287 for (j = 0; j < n; j++)
288 dest[i] += matrix[i][j] * vec[j];
289}
290
291/* Return true if TRANS is a legal transformation matrix that respects
292 the dependence vectors in DISTS and DIRS. The conservative answer
293 is false.
294
295 "Wolfe proves that a unimodular transformation represented by the
296 matrix T is legal when applied to a loop nest with a set of
297 lexicographically non-negative distance vectors RDG if and only if
298 for each vector d in RDG, (T.d >= 0) is lexicographically positive.
299 i.e.: if and only if it transforms the lexicographically positive
300 distance vectors to lexicographically positive vectors. Note that
301 a unimodular matrix must transform the zero vector (and only it) to
302 the zero vector." S.Muchnick. */
303
304static bool
305lambda_transform_legal_p (lambda_trans_matrix trans,
306 int nb_loops,
307 VEC (ddr_p, heap) *dependence_relations)
308{
309 unsigned int i, j;
310 lambda_vector distres;
311 struct data_dependence_relation *ddr;
312
313 gcc_assert (LTM_COLSIZE (trans) == nb_loops
314 && LTM_ROWSIZE (trans) == nb_loops);
315
316 /* When there are no dependences, the transformation is correct. */
317 if (VEC_length (ddr_p, dependence_relations) == 0)
318 return true;
319
320 ddr = VEC_index (ddr_p, dependence_relations, 0);
321 if (ddr == NULL)
322 return true;
323
324 /* When there is an unknown relation in the dependence_relations, we
325 know that it is no worth looking at this loop nest: give up. */
326 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
327 return false;
328
329 distres = lambda_vector_new (nb_loops);
330
331 /* For each distance vector in the dependence graph. */
332 FOR_EACH_VEC_ELT (ddr_p, dependence_relations, i, ddr)
333 {
334 /* Don't care about relations for which we know that there is no
335 dependence, nor about read-read (aka. output-dependences):
336 these data accesses can happen in any order. */
337 if (DDR_ARE_DEPENDENT (ddr) == chrec_known
338 || (DR_IS_READ (DDR_A (ddr)) && DR_IS_READ (DDR_B (ddr))))
339 continue;
340
341 /* Conservatively answer: "this transformation is not valid". */
342 if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
343 return false;
344
345 /* If the dependence could not be captured by a distance vector,
346 conservatively answer that the transform is not valid. */
347 if (DDR_NUM_DIST_VECTS (ddr) == 0)
348 return false;
349
350 /* Compute trans.dist_vect */
351 for (j = 0; j < DDR_NUM_DIST_VECTS (ddr); j++)
352 {
353 lambda_matrix_vector_mult (LTM_MATRIX (trans), nb_loops, nb_loops,
354 DDR_DIST_VECT (ddr, j), distres);
355
356 if (!lambda_vector_lexico_pos (distres, nb_loops))
357 return false;
358 }
359 }
360 return true;
361}
08dab97a
RL
362
363/* Data dependency analysis. Returns true if the iterations of LOOP
364 are independent on each other (that is, if we can execute them
365 in parallel). */
5f40b3cb
ZD
366
367static bool
f873b205 368loop_parallel_p (struct loop *loop, struct obstack * parloop_obstack)
5f40b3cb 369{
01be8516
SP
370 VEC (loop_p, heap) *loop_nest;
371 VEC (ddr_p, heap) *dependence_relations;
726a989a 372 VEC (data_reference_p, heap) *datarefs;
5f40b3cb
ZD
373 lambda_trans_matrix trans;
374 bool ret = false;
5f40b3cb
ZD
375
376 if (dump_file && (dump_flags & TDF_DETAILS))
48710229
RL
377 {
378 fprintf (dump_file, "Considering loop %d\n", loop->num);
379 if (!loop->inner)
380 fprintf (dump_file, "loop is innermost\n");
b8698a0f 381 else
48710229
RL
382 fprintf (dump_file, "loop NOT innermost\n");
383 }
5f40b3cb 384
5f40b3cb
ZD
385 /* Check for problems with dependences. If the loop can be reversed,
386 the iterations are independent. */
387 datarefs = VEC_alloc (data_reference_p, heap, 10);
388 dependence_relations = VEC_alloc (ddr_p, heap, 10 * 10);
01be8516 389 loop_nest = VEC_alloc (loop_p, heap, 3);
9ca3d00e
AB
390 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
391 &dependence_relations))
392 {
393 if (dump_file && (dump_flags & TDF_DETAILS))
394 fprintf (dump_file, " FAILED: cannot analyze data dependencies\n");
395 ret = false;
396 goto end;
397 }
5f40b3cb
ZD
398 if (dump_file && (dump_flags & TDF_DETAILS))
399 dump_data_dependence_relations (dump_file, dependence_relations);
400
f873b205 401 trans = lambda_trans_matrix_new (1, 1, parloop_obstack);
5f40b3cb
ZD
402 LTM_MATRIX (trans)[0][0] = -1;
403
404 if (lambda_transform_legal_p (trans, 1, dependence_relations))
405 {
406 ret = true;
407 if (dump_file && (dump_flags & TDF_DETAILS))
408 fprintf (dump_file, " SUCCESS: may be parallelized\n");
409 }
410 else if (dump_file && (dump_flags & TDF_DETAILS))
a509ebb5
RL
411 fprintf (dump_file,
412 " FAILED: data dependencies exist across iterations\n");
5f40b3cb 413
9ca3d00e 414 end:
01be8516 415 VEC_free (loop_p, heap, loop_nest);
5f40b3cb
ZD
416 free_dependence_relations (dependence_relations);
417 free_data_refs (datarefs);
418
419 return ret;
420}
421
1d4af1e8
SP
422/* Return true when LOOP contains basic blocks marked with the
423 BB_IRREDUCIBLE_LOOP flag. */
424
425static inline bool
426loop_has_blocks_with_irreducible_flag (struct loop *loop)
427{
428 unsigned i;
429 basic_block *bbs = get_loop_body_in_dom_order (loop);
430 bool res = true;
431
432 for (i = 0; i < loop->num_nodes; i++)
433 if (bbs[i]->flags & BB_IRREDUCIBLE_LOOP)
434 goto end;
435
436 res = false;
437 end:
438 free (bbs);
439 return res;
440}
441
8a171a59 442/* Assigns the address of OBJ in TYPE to an ssa name, and returns this name.
9f9f72aa 443 The assignment statement is placed on edge ENTRY. DECL_ADDRESS maps decls
8a171a59 444 to their addresses that can be reused. The address of OBJ is known to
cba1eb61
JJ
445 be invariant in the whole function. Other needed statements are placed
446 right before GSI. */
5f40b3cb
ZD
447
448static tree
cba1eb61
JJ
449take_address_of (tree obj, tree type, edge entry, htab_t decl_address,
450 gimple_stmt_iterator *gsi)
5f40b3cb 451{
8a171a59 452 int uid;
5f40b3cb
ZD
453 void **dslot;
454 struct int_tree_map ielt, *nielt;
726a989a
RB
455 tree *var_p, name, bvar, addr;
456 gimple stmt;
457 gimple_seq stmts;
5f40b3cb 458
8a171a59
ZD
459 /* Since the address of OBJ is invariant, the trees may be shared.
460 Avoid rewriting unrelated parts of the code. */
461 obj = unshare_expr (obj);
462 for (var_p = &obj;
463 handled_component_p (*var_p);
464 var_p = &TREE_OPERAND (*var_p, 0))
465 continue;
8a171a59 466
c9a410f0
RG
467 /* Canonicalize the access to base on a MEM_REF. */
468 if (DECL_P (*var_p))
469 *var_p = build_simple_mem_ref (build_fold_addr_expr (*var_p));
470
471 /* Assign a canonical SSA name to the address of the base decl used
472 in the address and share it for all accesses and addresses based
473 on it. */
474 uid = DECL_UID (TREE_OPERAND (TREE_OPERAND (*var_p, 0), 0));
5f40b3cb
ZD
475 ielt.uid = uid;
476 dslot = htab_find_slot_with_hash (decl_address, &ielt, uid, INSERT);
477 if (!*dslot)
478 {
cba1eb61
JJ
479 if (gsi == NULL)
480 return NULL;
c9a410f0
RG
481 addr = TREE_OPERAND (*var_p, 0);
482 bvar = create_tmp_var (TREE_TYPE (addr),
483 get_name (TREE_OPERAND
484 (TREE_OPERAND (*var_p, 0), 0)));
5f40b3cb 485 add_referenced_var (bvar);
726a989a 486 stmt = gimple_build_assign (bvar, addr);
5f40b3cb 487 name = make_ssa_name (bvar, stmt);
726a989a
RB
488 gimple_assign_set_lhs (stmt, name);
489 gsi_insert_on_edge_immediate (entry, stmt);
5f40b3cb
ZD
490
491 nielt = XNEW (struct int_tree_map);
492 nielt->uid = uid;
493 nielt->to = name;
494 *dslot = nielt;
5f40b3cb 495 }
8a171a59
ZD
496 else
497 name = ((struct int_tree_map *) *dslot)->to;
5f40b3cb 498
c9a410f0
RG
499 /* Express the address in terms of the canonical SSA name. */
500 TREE_OPERAND (*var_p, 0) = name;
cba1eb61
JJ
501 if (gsi == NULL)
502 return build_fold_addr_expr_with_type (obj, type);
503
c9a410f0
RG
504 name = force_gimple_operand (build_addr (obj, current_function_decl),
505 &stmts, true, NULL_TREE);
506 if (!gimple_seq_empty_p (stmts))
cba1eb61 507 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
5f40b3cb 508
c9a410f0 509 if (!useless_type_conversion_p (type, TREE_TYPE (name)))
8a171a59 510 {
726a989a 511 name = force_gimple_operand (fold_convert (type, name), &stmts, true,
8a171a59 512 NULL_TREE);
726a989a 513 if (!gimple_seq_empty_p (stmts))
cba1eb61 514 gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
8a171a59 515 }
5f40b3cb
ZD
516
517 return name;
518}
519
a509ebb5 520/* Callback for htab_traverse. Create the initialization statement
b8698a0f 521 for reduction described in SLOT, and place it at the preheader of
a509ebb5
RL
522 the loop described in DATA. */
523
524static int
525initialize_reductions (void **slot, void *data)
526{
a509ebb5 527 tree init, c;
a509ebb5
RL
528 tree bvar, type, arg;
529 edge e;
530
3d9a9f94 531 struct reduction_info *const reduc = (struct reduction_info *) *slot;
a509ebb5
RL
532 struct loop *loop = (struct loop *) data;
533
b8698a0f 534 /* Create initialization in preheader:
a509ebb5
RL
535 reduction_variable = initialization value of reduction. */
536
b8698a0f 537 /* In the phi node at the header, replace the argument coming
a509ebb5
RL
538 from the preheader with the reduction initialization value. */
539
540 /* Create a new variable to initialize the reduction. */
541 type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
542 bvar = create_tmp_var (type, "reduction");
543 add_referenced_var (bvar);
544
c2255bc4
AH
545 c = build_omp_clause (gimple_location (reduc->reduc_stmt),
546 OMP_CLAUSE_REDUCTION);
a509ebb5 547 OMP_CLAUSE_REDUCTION_CODE (c) = reduc->reduction_code;
726a989a 548 OMP_CLAUSE_DECL (c) = SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt));
a509ebb5
RL
549
550 init = omp_reduction_init (c, TREE_TYPE (bvar));
551 reduc->init = init;
552
b8698a0f
L
553 /* Replace the argument representing the initialization value
554 with the initialization value for the reduction (neutral
555 element for the particular operation, e.g. 0 for PLUS_EXPR,
556 1 for MULT_EXPR, etc).
557 Keep the old value in a new variable "reduction_initial",
558 that will be taken in consideration after the parallel
0eb7e7aa 559 computing is done. */
a509ebb5
RL
560
561 e = loop_preheader_edge (loop);
562 arg = PHI_ARG_DEF_FROM_EDGE (reduc->reduc_phi, e);
563 /* Create new variable to hold the initial value. */
a509ebb5 564
a509ebb5 565 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE
0eb7e7aa 566 (reduc->reduc_phi, loop_preheader_edge (loop)), init);
ae0bce62 567 reduc->initial_value = arg;
a509ebb5
RL
568 return 1;
569}
5f40b3cb
ZD
570
571struct elv_data
572{
726a989a 573 struct walk_stmt_info info;
9f9f72aa 574 edge entry;
5f40b3cb 575 htab_t decl_address;
cba1eb61 576 gimple_stmt_iterator *gsi;
5f40b3cb 577 bool changed;
cba1eb61 578 bool reset;
5f40b3cb
ZD
579};
580
9f9f72aa
AP
581/* Eliminates references to local variables in *TP out of the single
582 entry single exit region starting at DTA->ENTRY.
583 DECL_ADDRESS contains addresses of the references that had their
584 address taken already. If the expression is changed, CHANGED is
585 set to true. Callback for walk_tree. */
a509ebb5 586
5f40b3cb 587static tree
8a171a59 588eliminate_local_variables_1 (tree *tp, int *walk_subtrees, void *data)
5f40b3cb 589{
3d9a9f94 590 struct elv_data *const dta = (struct elv_data *) data;
8a171a59 591 tree t = *tp, var, addr, addr_type, type, obj;
5f40b3cb
ZD
592
593 if (DECL_P (t))
594 {
595 *walk_subtrees = 0;
596
597 if (!SSA_VAR_P (t) || DECL_EXTERNAL (t))
598 return NULL_TREE;
599
600 type = TREE_TYPE (t);
601 addr_type = build_pointer_type (type);
cba1eb61
JJ
602 addr = take_address_of (t, addr_type, dta->entry, dta->decl_address,
603 dta->gsi);
604 if (dta->gsi == NULL && addr == NULL_TREE)
605 {
606 dta->reset = true;
607 return NULL_TREE;
608 }
609
70f34814 610 *tp = build_simple_mem_ref (addr);
5f40b3cb
ZD
611
612 dta->changed = true;
613 return NULL_TREE;
614 }
615
616 if (TREE_CODE (t) == ADDR_EXPR)
617 {
8a171a59
ZD
618 /* ADDR_EXPR may appear in two contexts:
619 -- as a gimple operand, when the address taken is a function invariant
620 -- as gimple rhs, when the resulting address in not a function
621 invariant
622 We do not need to do anything special in the latter case (the base of
623 the memory reference whose address is taken may be replaced in the
624 DECL_P case). The former case is more complicated, as we need to
625 ensure that the new address is still a gimple operand. Thus, it
626 is not sufficient to replace just the base of the memory reference --
627 we need to move the whole computation of the address out of the
628 loop. */
629 if (!is_gimple_val (t))
5f40b3cb
ZD
630 return NULL_TREE;
631
632 *walk_subtrees = 0;
8a171a59
ZD
633 obj = TREE_OPERAND (t, 0);
634 var = get_base_address (obj);
635 if (!var || !SSA_VAR_P (var) || DECL_EXTERNAL (var))
5f40b3cb
ZD
636 return NULL_TREE;
637
638 addr_type = TREE_TYPE (t);
cba1eb61
JJ
639 addr = take_address_of (obj, addr_type, dta->entry, dta->decl_address,
640 dta->gsi);
641 if (dta->gsi == NULL && addr == NULL_TREE)
642 {
643 dta->reset = true;
644 return NULL_TREE;
645 }
5f40b3cb
ZD
646 *tp = addr;
647
648 dta->changed = true;
649 return NULL_TREE;
650 }
651
726a989a 652 if (!EXPR_P (t))
5f40b3cb
ZD
653 *walk_subtrees = 0;
654
655 return NULL_TREE;
656}
657
cba1eb61 658/* Moves the references to local variables in STMT at *GSI out of the single
9f9f72aa
AP
659 entry single exit region starting at ENTRY. DECL_ADDRESS contains
660 addresses of the references that had their address taken
661 already. */
5f40b3cb
ZD
662
663static void
cba1eb61 664eliminate_local_variables_stmt (edge entry, gimple_stmt_iterator *gsi,
5f40b3cb
ZD
665 htab_t decl_address)
666{
667 struct elv_data dta;
cba1eb61 668 gimple stmt = gsi_stmt (*gsi);
5f40b3cb 669
726a989a 670 memset (&dta.info, '\0', sizeof (dta.info));
9f9f72aa 671 dta.entry = entry;
5f40b3cb
ZD
672 dta.decl_address = decl_address;
673 dta.changed = false;
cba1eb61 674 dta.reset = false;
5f40b3cb 675
b5b8b0ac 676 if (gimple_debug_bind_p (stmt))
cba1eb61
JJ
677 {
678 dta.gsi = NULL;
679 walk_tree (gimple_debug_bind_get_value_ptr (stmt),
680 eliminate_local_variables_1, &dta.info, NULL);
681 if (dta.reset)
682 {
683 gimple_debug_bind_reset_value (stmt);
684 dta.changed = true;
685 }
686 }
b5b8b0ac 687 else
cba1eb61
JJ
688 {
689 dta.gsi = gsi;
690 walk_gimple_op (stmt, eliminate_local_variables_1, &dta.info);
691 }
5f40b3cb
ZD
692
693 if (dta.changed)
694 update_stmt (stmt);
695}
696
9f9f72aa
AP
697/* Eliminates the references to local variables from the single entry
698 single exit region between the ENTRY and EXIT edges.
b8698a0f 699
a509ebb5 700 This includes:
b8698a0f
L
701 1) Taking address of a local variable -- these are moved out of the
702 region (and temporary variable is created to hold the address if
a509ebb5 703 necessary).
9f9f72aa 704
5f40b3cb 705 2) Dereferencing a local variable -- these are replaced with indirect
a509ebb5 706 references. */
5f40b3cb
ZD
707
708static void
9f9f72aa 709eliminate_local_variables (edge entry, edge exit)
5f40b3cb 710{
9f9f72aa
AP
711 basic_block bb;
712 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
5f40b3cb 713 unsigned i;
726a989a 714 gimple_stmt_iterator gsi;
cba1eb61 715 bool has_debug_stmt = false;
5f40b3cb
ZD
716 htab_t decl_address = htab_create (10, int_tree_map_hash, int_tree_map_eq,
717 free);
9f9f72aa
AP
718 basic_block entry_bb = entry->src;
719 basic_block exit_bb = exit->dest;
5f40b3cb 720
9f9f72aa 721 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 722
ac47786e 723 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
9f9f72aa 724 if (bb != entry_bb && bb != exit_bb)
726a989a 725 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
ddb555ed
JJ
726 if (is_gimple_debug (gsi_stmt (gsi)))
727 {
728 if (gimple_debug_bind_p (gsi_stmt (gsi)))
729 has_debug_stmt = true;
730 }
cba1eb61
JJ
731 else
732 eliminate_local_variables_stmt (entry, &gsi, decl_address);
733
734 if (has_debug_stmt)
735 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
736 if (bb != entry_bb && bb != exit_bb)
737 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
738 if (gimple_debug_bind_p (gsi_stmt (gsi)))
739 eliminate_local_variables_stmt (entry, &gsi, decl_address);
5f40b3cb
ZD
740
741 htab_delete (decl_address);
9f9f72aa
AP
742 VEC_free (basic_block, heap, body);
743}
744
745/* Returns true if expression EXPR is not defined between ENTRY and
746 EXIT, i.e. if all its operands are defined outside of the region. */
747
748static bool
749expr_invariant_in_region_p (edge entry, edge exit, tree expr)
750{
751 basic_block entry_bb = entry->src;
752 basic_block exit_bb = exit->dest;
753 basic_block def_bb;
9f9f72aa
AP
754
755 if (is_gimple_min_invariant (expr))
756 return true;
757
758 if (TREE_CODE (expr) == SSA_NAME)
759 {
726a989a 760 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
9f9f72aa
AP
761 if (def_bb
762 && dominated_by_p (CDI_DOMINATORS, def_bb, entry_bb)
763 && !dominated_by_p (CDI_DOMINATORS, def_bb, exit_bb))
764 return false;
765
766 return true;
767 }
768
726a989a 769 return false;
5f40b3cb
ZD
770}
771
772/* If COPY_NAME_P is true, creates and returns a duplicate of NAME.
773 The copies are stored to NAME_COPIES, if NAME was already duplicated,
774 its duplicate stored in NAME_COPIES is returned.
b8698a0f 775
5f40b3cb
ZD
776 Regardless of COPY_NAME_P, the decl used as a base of the ssa name is also
777 duplicated, storing the copies in DECL_COPIES. */
778
779static tree
9f9f72aa
AP
780separate_decls_in_region_name (tree name,
781 htab_t name_copies, htab_t decl_copies,
782 bool copy_name_p)
5f40b3cb
ZD
783{
784 tree copy, var, var_copy;
785 unsigned idx, uid, nuid;
786 struct int_tree_map ielt, *nielt;
787 struct name_to_copy_elt elt, *nelt;
788 void **slot, **dslot;
789
790 if (TREE_CODE (name) != SSA_NAME)
791 return name;
792
793 idx = SSA_NAME_VERSION (name);
794 elt.version = idx;
795 slot = htab_find_slot_with_hash (name_copies, &elt, idx,
796 copy_name_p ? INSERT : NO_INSERT);
797 if (slot && *slot)
798 return ((struct name_to_copy_elt *) *slot)->new_name;
799
800 var = SSA_NAME_VAR (name);
801 uid = DECL_UID (var);
802 ielt.uid = uid;
803 dslot = htab_find_slot_with_hash (decl_copies, &ielt, uid, INSERT);
804 if (!*dslot)
805 {
806 var_copy = create_tmp_var (TREE_TYPE (var), get_name (var));
36ad7922 807 DECL_GIMPLE_REG_P (var_copy) = DECL_GIMPLE_REG_P (var);
5f40b3cb
ZD
808 add_referenced_var (var_copy);
809 nielt = XNEW (struct int_tree_map);
810 nielt->uid = uid;
811 nielt->to = var_copy;
812 *dslot = nielt;
813
814 /* Ensure that when we meet this decl next time, we won't duplicate
a509ebb5 815 it again. */
5f40b3cb
ZD
816 nuid = DECL_UID (var_copy);
817 ielt.uid = nuid;
818 dslot = htab_find_slot_with_hash (decl_copies, &ielt, nuid, INSERT);
819 gcc_assert (!*dslot);
820 nielt = XNEW (struct int_tree_map);
821 nielt->uid = nuid;
822 nielt->to = var_copy;
823 *dslot = nielt;
824 }
825 else
826 var_copy = ((struct int_tree_map *) *dslot)->to;
827
828 if (copy_name_p)
829 {
726a989a 830 copy = duplicate_ssa_name (name, NULL);
5f40b3cb
ZD
831 nelt = XNEW (struct name_to_copy_elt);
832 nelt->version = idx;
833 nelt->new_name = copy;
834 nelt->field = NULL_TREE;
835 *slot = nelt;
836 }
837 else
838 {
839 gcc_assert (!slot);
840 copy = name;
841 }
842
843 SSA_NAME_VAR (copy) = var_copy;
844 return copy;
845}
846
9f9f72aa
AP
847/* Finds the ssa names used in STMT that are defined outside the
848 region between ENTRY and EXIT and replaces such ssa names with
849 their duplicates. The duplicates are stored to NAME_COPIES. Base
850 decls of all ssa names used in STMT (including those defined in
851 LOOP) are replaced with the new temporary variables; the
852 replacement decls are stored in DECL_COPIES. */
5f40b3cb
ZD
853
854static void
726a989a 855separate_decls_in_region_stmt (edge entry, edge exit, gimple stmt,
9f9f72aa 856 htab_t name_copies, htab_t decl_copies)
5f40b3cb
ZD
857{
858 use_operand_p use;
859 def_operand_p def;
860 ssa_op_iter oi;
861 tree name, copy;
862 bool copy_name_p;
863
864 mark_virtual_ops_for_renaming (stmt);
865
866 FOR_EACH_PHI_OR_STMT_DEF (def, stmt, oi, SSA_OP_DEF)
a509ebb5
RL
867 {
868 name = DEF_FROM_PTR (def);
869 gcc_assert (TREE_CODE (name) == SSA_NAME);
9f9f72aa
AP
870 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
871 false);
a509ebb5
RL
872 gcc_assert (copy == name);
873 }
5f40b3cb
ZD
874
875 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
a509ebb5
RL
876 {
877 name = USE_FROM_PTR (use);
878 if (TREE_CODE (name) != SSA_NAME)
879 continue;
880
9f9f72aa
AP
881 copy_name_p = expr_invariant_in_region_p (entry, exit, name);
882 copy = separate_decls_in_region_name (name, name_copies, decl_copies,
883 copy_name_p);
a509ebb5
RL
884 SET_USE (use, copy);
885 }
5f40b3cb
ZD
886}
887
b5b8b0ac
AO
888/* Finds the ssa names used in STMT that are defined outside the
889 region between ENTRY and EXIT and replaces such ssa names with
890 their duplicates. The duplicates are stored to NAME_COPIES. Base
891 decls of all ssa names used in STMT (including those defined in
892 LOOP) are replaced with the new temporary variables; the
893 replacement decls are stored in DECL_COPIES. */
894
895static bool
ddb555ed
JJ
896separate_decls_in_region_debug (gimple stmt, htab_t name_copies,
897 htab_t decl_copies)
b5b8b0ac
AO
898{
899 use_operand_p use;
900 ssa_op_iter oi;
901 tree var, name;
902 struct int_tree_map ielt;
903 struct name_to_copy_elt elt;
904 void **slot, **dslot;
905
ddb555ed
JJ
906 if (gimple_debug_bind_p (stmt))
907 var = gimple_debug_bind_get_var (stmt);
908 else if (gimple_debug_source_bind_p (stmt))
909 var = gimple_debug_source_bind_get_var (stmt);
910 else
911 return true;
598e67d7 912 if (TREE_CODE (var) == DEBUG_EXPR_DECL || TREE_CODE (var) == LABEL_DECL)
4f2a9af8 913 return true;
b5b8b0ac
AO
914 gcc_assert (DECL_P (var) && SSA_VAR_P (var));
915 ielt.uid = DECL_UID (var);
916 dslot = htab_find_slot_with_hash (decl_copies, &ielt, ielt.uid, NO_INSERT);
917 if (!dslot)
918 return true;
ddb555ed
JJ
919 if (gimple_debug_bind_p (stmt))
920 gimple_debug_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
921 else if (gimple_debug_source_bind_p (stmt))
922 gimple_debug_source_bind_set_var (stmt, ((struct int_tree_map *) *dslot)->to);
b5b8b0ac
AO
923
924 FOR_EACH_PHI_OR_STMT_USE (use, stmt, oi, SSA_OP_USE)
925 {
926 name = USE_FROM_PTR (use);
927 if (TREE_CODE (name) != SSA_NAME)
928 continue;
929
930 elt.version = SSA_NAME_VERSION (name);
931 slot = htab_find_slot_with_hash (name_copies, &elt, elt.version, NO_INSERT);
932 if (!slot)
933 {
934 gimple_debug_bind_reset_value (stmt);
935 update_stmt (stmt);
936 break;
937 }
938
939 SET_USE (use, ((struct name_to_copy_elt *) *slot)->new_name);
940 }
941
942 return false;
943}
944
0eb7e7aa
RL
945/* Callback for htab_traverse. Adds a field corresponding to the reduction
946 specified in SLOT. The type is passed in DATA. */
947
948static int
949add_field_for_reduction (void **slot, void *data)
a509ebb5 950{
b8698a0f 951
3d9a9f94
KG
952 struct reduction_info *const red = (struct reduction_info *) *slot;
953 tree const type = (tree) data;
726a989a 954 tree var = SSA_NAME_VAR (gimple_assign_lhs (red->reduc_stmt));
c2255bc4
AH
955 tree field = build_decl (gimple_location (red->reduc_stmt),
956 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
0eb7e7aa
RL
957
958 insert_field_into_struct (type, field);
959
960 red->field = field;
961
962 return 1;
963}
a509ebb5 964
5f40b3cb 965/* Callback for htab_traverse. Adds a field corresponding to a ssa name
b8698a0f 966 described in SLOT. The type is passed in DATA. */
5f40b3cb
ZD
967
968static int
969add_field_for_name (void **slot, void *data)
970{
3d9a9f94
KG
971 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
972 tree type = (tree) data;
5f40b3cb
ZD
973 tree name = ssa_name (elt->version);
974 tree var = SSA_NAME_VAR (name);
c2255bc4
AH
975 tree field = build_decl (DECL_SOURCE_LOCATION (var),
976 FIELD_DECL, DECL_NAME (var), TREE_TYPE (var));
5f40b3cb
ZD
977
978 insert_field_into_struct (type, field);
979 elt->field = field;
a509ebb5 980
5f40b3cb
ZD
981 return 1;
982}
983
b8698a0f
L
984/* Callback for htab_traverse. A local result is the intermediate result
985 computed by a single
fa10beec 986 thread, or the initial value in case no iteration was executed.
b8698a0f
L
987 This function creates a phi node reflecting these values.
988 The phi's result will be stored in NEW_PHI field of the
989 reduction's data structure. */
a509ebb5
RL
990
991static int
992create_phi_for_local_result (void **slot, void *data)
993{
3d9a9f94
KG
994 struct reduction_info *const reduc = (struct reduction_info *) *slot;
995 const struct loop *const loop = (const struct loop *) data;
a509ebb5 996 edge e;
726a989a 997 gimple new_phi;
a509ebb5
RL
998 basic_block store_bb;
999 tree local_res;
f5045c96 1000 source_location locus;
e53a3e77 1001 tree block;
a509ebb5 1002
b8698a0f
L
1003 /* STORE_BB is the block where the phi
1004 should be stored. It is the destination of the loop exit.
726a989a 1005 (Find the fallthru edge from GIMPLE_OMP_CONTINUE). */
a509ebb5
RL
1006 store_bb = FALLTHRU_EDGE (loop->latch)->dest;
1007
1008 /* STORE_BB has two predecessors. One coming from the loop
1009 (the reduction's result is computed at the loop),
b8698a0f
L
1010 and another coming from a block preceding the loop,
1011 when no iterations
1012 are executed (the initial value should be taken). */
a509ebb5
RL
1013 if (EDGE_PRED (store_bb, 0) == FALLTHRU_EDGE (loop->latch))
1014 e = EDGE_PRED (store_bb, 1);
1015 else
1016 e = EDGE_PRED (store_bb, 0);
726a989a
RB
1017 local_res
1018 = make_ssa_name (SSA_NAME_VAR (gimple_assign_lhs (reduc->reduc_stmt)),
1019 NULL);
f5045c96 1020 locus = gimple_location (reduc->reduc_stmt);
e53a3e77 1021 block = gimple_block (reduc->reduc_stmt);
a509ebb5
RL
1022 new_phi = create_phi_node (local_res, store_bb);
1023 SSA_NAME_DEF_STMT (local_res) = new_phi;
e53a3e77 1024 add_phi_arg (new_phi, reduc->init, e, locus, block);
726a989a 1025 add_phi_arg (new_phi, gimple_assign_lhs (reduc->reduc_stmt),
e53a3e77 1026 FALLTHRU_EDGE (loop->latch), locus, block);
a509ebb5
RL
1027 reduc->new_phi = new_phi;
1028
1029 return 1;
1030}
5f40b3cb
ZD
1031
1032struct clsn_data
1033{
1034 tree store;
1035 tree load;
1036
1037 basic_block store_bb;
1038 basic_block load_bb;
1039};
1040
a509ebb5 1041/* Callback for htab_traverse. Create an atomic instruction for the
b8698a0f 1042 reduction described in SLOT.
a509ebb5
RL
1043 DATA annotates the place in memory the atomic operation relates to,
1044 and the basic block it needs to be generated in. */
1045
1046static int
1047create_call_for_reduction_1 (void **slot, void *data)
1048{
3d9a9f94
KG
1049 struct reduction_info *const reduc = (struct reduction_info *) *slot;
1050 struct clsn_data *const clsn_data = (struct clsn_data *) data;
726a989a 1051 gimple_stmt_iterator gsi;
a509ebb5 1052 tree type = TREE_TYPE (PHI_RESULT (reduc->reduc_phi));
a509ebb5
RL
1053 tree load_struct;
1054 basic_block bb;
1055 basic_block new_bb;
1056 edge e;
0f900dfa 1057 tree t, addr, ref, x;
726a989a
RB
1058 tree tmp_load, name;
1059 gimple load;
a509ebb5 1060
70f34814 1061 load_struct = build_simple_mem_ref (clsn_data->load);
a509ebb5 1062 t = build3 (COMPONENT_REF, type, load_struct, reduc->field, NULL_TREE);
a509ebb5
RL
1063
1064 addr = build_addr (t, current_function_decl);
1065
1066 /* Create phi node. */
1067 bb = clsn_data->load_bb;
1068
1069 e = split_block (bb, t);
1070 new_bb = e->dest;
1071
1072 tmp_load = create_tmp_var (TREE_TYPE (TREE_TYPE (addr)), NULL);
1073 add_referenced_var (tmp_load);
1074 tmp_load = make_ssa_name (tmp_load, NULL);
726a989a 1075 load = gimple_build_omp_atomic_load (tmp_load, addr);
a509ebb5 1076 SSA_NAME_DEF_STMT (tmp_load) = load;
726a989a
RB
1077 gsi = gsi_start_bb (new_bb);
1078 gsi_insert_after (&gsi, load, GSI_NEW_STMT);
a509ebb5
RL
1079
1080 e = split_block (new_bb, load);
1081 new_bb = e->dest;
726a989a 1082 gsi = gsi_start_bb (new_bb);
a509ebb5 1083 ref = tmp_load;
726a989a
RB
1084 x = fold_build2 (reduc->reduction_code,
1085 TREE_TYPE (PHI_RESULT (reduc->new_phi)), ref,
1086 PHI_RESULT (reduc->new_phi));
a509ebb5 1087
726a989a
RB
1088 name = force_gimple_operand_gsi (&gsi, x, true, NULL_TREE, true,
1089 GSI_CONTINUE_LINKING);
a509ebb5 1090
726a989a 1091 gsi_insert_after (&gsi, gimple_build_omp_atomic_store (name), GSI_NEW_STMT);
a509ebb5
RL
1092 return 1;
1093}
1094
b8698a0f
L
1095/* Create the atomic operation at the join point of the threads.
1096 REDUCTION_LIST describes the reductions in the LOOP.
1097 LD_ST_DATA describes the shared data structure where
a509ebb5
RL
1098 shared data is stored in and loaded from. */
1099static void
b8698a0f 1100create_call_for_reduction (struct loop *loop, htab_t reduction_list,
a509ebb5
RL
1101 struct clsn_data *ld_st_data)
1102{
1103 htab_traverse (reduction_list, create_phi_for_local_result, loop);
726a989a 1104 /* Find the fallthru edge from GIMPLE_OMP_CONTINUE. */
a509ebb5
RL
1105 ld_st_data->load_bb = FALLTHRU_EDGE (loop->latch)->dest;
1106 htab_traverse (reduction_list, create_call_for_reduction_1, ld_st_data);
1107}
1108
ae0bce62
RL
1109/* Callback for htab_traverse. Loads the final reduction value at the
1110 join point of all threads, and inserts it in the right place. */
a509ebb5
RL
1111
1112static int
1113create_loads_for_reductions (void **slot, void *data)
1114{
3d9a9f94
KG
1115 struct reduction_info *const red = (struct reduction_info *) *slot;
1116 struct clsn_data *const clsn_data = (struct clsn_data *) data;
726a989a
RB
1117 gimple stmt;
1118 gimple_stmt_iterator gsi;
1119 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
a509ebb5 1120 tree load_struct;
ae0bce62 1121 tree name;
a509ebb5
RL
1122 tree x;
1123
726a989a 1124 gsi = gsi_after_labels (clsn_data->load_bb);
70f34814 1125 load_struct = build_simple_mem_ref (clsn_data->load);
a509ebb5
RL
1126 load_struct = build3 (COMPONENT_REF, type, load_struct, red->field,
1127 NULL_TREE);
a509ebb5 1128
ae0bce62 1129 x = load_struct;
a509ebb5 1130 name = PHI_RESULT (red->keep_res);
726a989a 1131 stmt = gimple_build_assign (name, x);
a509ebb5
RL
1132 SSA_NAME_DEF_STMT (name) = stmt;
1133
726a989a 1134 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
a509ebb5 1135
726a989a
RB
1136 for (gsi = gsi_start_phis (gimple_bb (red->keep_res));
1137 !gsi_end_p (gsi); gsi_next (&gsi))
1138 if (gsi_stmt (gsi) == red->keep_res)
1139 {
1140 remove_phi_node (&gsi, false);
1141 return 1;
1142 }
1143 gcc_unreachable ();
a509ebb5
RL
1144}
1145
b8698a0f 1146/* Load the reduction result that was stored in LD_ST_DATA.
a509ebb5 1147 REDUCTION_LIST describes the list of reductions that the
fa10beec 1148 loads should be generated for. */
a509ebb5 1149static void
b8698a0f 1150create_final_loads_for_reduction (htab_t reduction_list,
a509ebb5
RL
1151 struct clsn_data *ld_st_data)
1152{
726a989a 1153 gimple_stmt_iterator gsi;
a509ebb5 1154 tree t;
726a989a 1155 gimple stmt;
a509ebb5 1156
726a989a 1157 gsi = gsi_after_labels (ld_st_data->load_bb);
a509ebb5 1158 t = build_fold_addr_expr (ld_st_data->store);
726a989a 1159 stmt = gimple_build_assign (ld_st_data->load, t);
a509ebb5 1160
726a989a
RB
1161 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1162 SSA_NAME_DEF_STMT (ld_st_data->load) = stmt;
a509ebb5
RL
1163
1164 htab_traverse (reduction_list, create_loads_for_reductions, ld_st_data);
1165
1166}
1167
0eb7e7aa
RL
1168/* Callback for htab_traverse. Store the neutral value for the
1169 particular reduction's operation, e.g. 0 for PLUS_EXPR,
1170 1 for MULT_EXPR, etc. into the reduction field.
b8698a0f
L
1171 The reduction is specified in SLOT. The store information is
1172 passed in DATA. */
0eb7e7aa
RL
1173
1174static int
1175create_stores_for_reduction (void **slot, void *data)
1176{
3d9a9f94
KG
1177 struct reduction_info *const red = (struct reduction_info *) *slot;
1178 struct clsn_data *const clsn_data = (struct clsn_data *) data;
726a989a
RB
1179 tree t;
1180 gimple stmt;
1181 gimple_stmt_iterator gsi;
1182 tree type = TREE_TYPE (gimple_assign_lhs (red->reduc_stmt));
1183
1184 gsi = gsi_last_bb (clsn_data->store_bb);
1185 t = build3 (COMPONENT_REF, type, clsn_data->store, red->field, NULL_TREE);
1186 stmt = gimple_build_assign (t, red->initial_value);
0eb7e7aa 1187 mark_virtual_ops_for_renaming (stmt);
726a989a 1188 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
0eb7e7aa
RL
1189
1190 return 1;
1191}
1192
a509ebb5
RL
1193/* Callback for htab_traverse. Creates loads to a field of LOAD in LOAD_BB and
1194 store to a field of STORE in STORE_BB for the ssa name and its duplicate
1195 specified in SLOT. */
1196
5f40b3cb
ZD
1197static int
1198create_loads_and_stores_for_name (void **slot, void *data)
1199{
3d9a9f94
KG
1200 struct name_to_copy_elt *const elt = (struct name_to_copy_elt *) *slot;
1201 struct clsn_data *const clsn_data = (struct clsn_data *) data;
726a989a
RB
1202 tree t;
1203 gimple stmt;
1204 gimple_stmt_iterator gsi;
5f40b3cb 1205 tree type = TREE_TYPE (elt->new_name);
5f40b3cb
ZD
1206 tree load_struct;
1207
726a989a
RB
1208 gsi = gsi_last_bb (clsn_data->store_bb);
1209 t = build3 (COMPONENT_REF, type, clsn_data->store, elt->field, NULL_TREE);
1210 stmt = gimple_build_assign (t, ssa_name (elt->version));
5f40b3cb 1211 mark_virtual_ops_for_renaming (stmt);
726a989a 1212 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb 1213
726a989a 1214 gsi = gsi_last_bb (clsn_data->load_bb);
70f34814 1215 load_struct = build_simple_mem_ref (clsn_data->load);
726a989a
RB
1216 t = build3 (COMPONENT_REF, type, load_struct, elt->field, NULL_TREE);
1217 stmt = gimple_build_assign (elt->new_name, t);
5f40b3cb 1218 SSA_NAME_DEF_STMT (elt->new_name) = stmt;
726a989a 1219 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1220
1221 return 1;
1222}
1223
1224/* Moves all the variables used in LOOP and defined outside of it (including
1225 the initial values of loop phi nodes, and *PER_THREAD if it is a ssa
1226 name) to a structure created for this purpose. The code
b8698a0f 1227
5f40b3cb
ZD
1228 while (1)
1229 {
1230 use (a);
1231 use (b);
1232 }
1233
1234 is transformed this way:
1235
1236 bb0:
1237 old.a = a;
1238 old.b = b;
1239
1240 bb1:
1241 a' = new->a;
1242 b' = new->b;
1243 while (1)
1244 {
1245 use (a');
1246 use (b');
1247 }
1248
1249 `old' is stored to *ARG_STRUCT and `new' is stored to NEW_ARG_STRUCT. The
1250 pointer `new' is intentionally not initialized (the loop will be split to a
1251 separate function later, and `new' will be initialized from its arguments).
a509ebb5 1252 LD_ST_DATA holds information about the shared data structure used to pass
b8698a0f
L
1253 information among the threads. It is initialized here, and
1254 gen_parallel_loop will pass it to create_call_for_reduction that
1255 needs this information. REDUCTION_LIST describes the reductions
a509ebb5 1256 in LOOP. */
5f40b3cb
ZD
1257
1258static void
9f9f72aa 1259separate_decls_in_region (edge entry, edge exit, htab_t reduction_list,
b8698a0f 1260 tree *arg_struct, tree *new_arg_struct,
9f9f72aa 1261 struct clsn_data *ld_st_data)
a509ebb5 1262
5f40b3cb 1263{
9f9f72aa 1264 basic_block bb1 = split_edge (entry);
5f40b3cb
ZD
1265 basic_block bb0 = single_pred (bb1);
1266 htab_t name_copies = htab_create (10, name_to_copy_elt_hash,
1267 name_to_copy_elt_eq, free);
1268 htab_t decl_copies = htab_create (10, int_tree_map_hash, int_tree_map_eq,
1269 free);
5f40b3cb 1270 unsigned i;
726a989a
RB
1271 tree type, type_name, nvar;
1272 gimple_stmt_iterator gsi;
5f40b3cb 1273 struct clsn_data clsn_data;
9f9f72aa
AP
1274 VEC (basic_block, heap) *body = VEC_alloc (basic_block, heap, 3);
1275 basic_block bb;
1276 basic_block entry_bb = bb1;
1277 basic_block exit_bb = exit->dest;
b5b8b0ac 1278 bool has_debug_stmt = false;
5f40b3cb 1279
726a989a 1280 entry = single_succ_edge (entry_bb);
9f9f72aa 1281 gather_blocks_in_sese_region (entry_bb, exit_bb, &body);
5f40b3cb 1282
ac47786e 1283 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
9f9f72aa 1284 {
b8698a0f 1285 if (bb != entry_bb && bb != exit_bb)
9f9f72aa 1286 {
726a989a
RB
1287 for (gsi = gsi_start_phis (bb); !gsi_end_p (gsi); gsi_next (&gsi))
1288 separate_decls_in_region_stmt (entry, exit, gsi_stmt (gsi),
1289 name_copies, decl_copies);
1290
1291 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi); gsi_next (&gsi))
b5b8b0ac
AO
1292 {
1293 gimple stmt = gsi_stmt (gsi);
1294
1295 if (is_gimple_debug (stmt))
1296 has_debug_stmt = true;
1297 else
1298 separate_decls_in_region_stmt (entry, exit, stmt,
1299 name_copies, decl_copies);
1300 }
9f9f72aa 1301 }
5f40b3cb 1302 }
9f9f72aa 1303
b5b8b0ac
AO
1304 /* Now process debug bind stmts. We must not create decls while
1305 processing debug stmts, so we defer their processing so as to
1306 make sure we will have debug info for as many variables as
1307 possible (all of those that were dealt with in the loop above),
1308 and discard those for which we know there's nothing we can
1309 do. */
1310 if (has_debug_stmt)
ac47786e 1311 FOR_EACH_VEC_ELT (basic_block, body, i, bb)
b5b8b0ac
AO
1312 if (bb != entry_bb && bb != exit_bb)
1313 {
1314 for (gsi = gsi_start_bb (bb); !gsi_end_p (gsi);)
1315 {
1316 gimple stmt = gsi_stmt (gsi);
1317
ddb555ed 1318 if (is_gimple_debug (stmt))
b5b8b0ac 1319 {
ddb555ed
JJ
1320 if (separate_decls_in_region_debug (stmt, name_copies,
1321 decl_copies))
b5b8b0ac
AO
1322 {
1323 gsi_remove (&gsi, true);
1324 continue;
1325 }
1326 }
1327
1328 gsi_next (&gsi);
1329 }
1330 }
1331
9f9f72aa 1332 VEC_free (basic_block, heap, body);
5f40b3cb 1333
b8698a0f 1334 if (htab_elements (name_copies) == 0 && htab_elements (reduction_list) == 0)
5f40b3cb
ZD
1335 {
1336 /* It may happen that there is nothing to copy (if there are only
a509ebb5 1337 loop carried and external variables in the loop). */
5f40b3cb
ZD
1338 *arg_struct = NULL;
1339 *new_arg_struct = NULL;
1340 }
1341 else
1342 {
1343 /* Create the type for the structure to store the ssa names to. */
1344 type = lang_hooks.types.make_type (RECORD_TYPE);
9ff70652 1345 type_name = build_decl (UNKNOWN_LOCATION,
c2255bc4 1346 TYPE_DECL, create_tmp_var_name (".paral_data"),
5f40b3cb
ZD
1347 type);
1348 TYPE_NAME (type) = type_name;
1349
0eb7e7aa 1350 htab_traverse (name_copies, add_field_for_name, type);
9f9f72aa 1351 if (reduction_list && htab_elements (reduction_list) > 0)
0eb7e7aa
RL
1352 {
1353 /* Create the fields for reductions. */
1354 htab_traverse (reduction_list, add_field_for_reduction,
1355 type);
1356 }
5f40b3cb 1357 layout_type (type);
b8698a0f 1358
5f40b3cb
ZD
1359 /* Create the loads and stores. */
1360 *arg_struct = create_tmp_var (type, ".paral_data_store");
1361 add_referenced_var (*arg_struct);
1362 nvar = create_tmp_var (build_pointer_type (type), ".paral_data_load");
1363 add_referenced_var (nvar);
726a989a 1364 *new_arg_struct = make_ssa_name (nvar, NULL);
5f40b3cb 1365
a509ebb5
RL
1366 ld_st_data->store = *arg_struct;
1367 ld_st_data->load = *new_arg_struct;
1368 ld_st_data->store_bb = bb0;
1369 ld_st_data->load_bb = bb1;
0eb7e7aa 1370
5f40b3cb 1371 htab_traverse (name_copies, create_loads_and_stores_for_name,
a509ebb5
RL
1372 ld_st_data);
1373
ae0bce62
RL
1374 /* Load the calculation from memory (after the join of the threads). */
1375
9f9f72aa 1376 if (reduction_list && htab_elements (reduction_list) > 0)
a509ebb5 1377 {
0eb7e7aa 1378 htab_traverse (reduction_list, create_stores_for_reduction,
b8698a0f 1379 ld_st_data);
726a989a 1380 clsn_data.load = make_ssa_name (nvar, NULL);
9f9f72aa 1381 clsn_data.load_bb = exit->dest;
a509ebb5
RL
1382 clsn_data.store = ld_st_data->store;
1383 create_final_loads_for_reduction (reduction_list, &clsn_data);
1384 }
5f40b3cb
ZD
1385 }
1386
1387 htab_delete (decl_copies);
1388 htab_delete (name_copies);
1389}
1390
1391/* Bitmap containing uids of functions created by parallelization. We cannot
1392 allocate it from the default obstack, as it must live across compilation
1393 of several functions; we make it gc allocated instead. */
1394
1395static GTY(()) bitmap parallelized_functions;
1396
1397/* Returns true if FN was created by create_loop_fn. */
1398
62e0a1ed 1399bool
5f40b3cb
ZD
1400parallelized_function_p (tree fn)
1401{
1402 if (!parallelized_functions || !DECL_ARTIFICIAL (fn))
1403 return false;
1404
1405 return bitmap_bit_p (parallelized_functions, DECL_UID (fn));
1406}
1407
1408/* Creates and returns an empty function that will receive the body of
1409 a parallelized loop. */
1410
1411static tree
9ff70652 1412create_loop_fn (location_t loc)
5f40b3cb
ZD
1413{
1414 char buf[100];
1415 char *tname;
1416 tree decl, type, name, t;
1417 struct function *act_cfun = cfun;
1418 static unsigned loopfn_num;
1419
1420 snprintf (buf, 100, "%s.$loopfn", current_function_name ());
1421 ASM_FORMAT_PRIVATE_NAME (tname, buf, loopfn_num++);
1422 clean_symbol_name (tname);
1423 name = get_identifier (tname);
1424 type = build_function_type_list (void_type_node, ptr_type_node, NULL_TREE);
1425
9ff70652 1426 decl = build_decl (loc, FUNCTION_DECL, name, type);
5f40b3cb
ZD
1427 if (!parallelized_functions)
1428 parallelized_functions = BITMAP_GGC_ALLOC ();
1429 bitmap_set_bit (parallelized_functions, DECL_UID (decl));
1430
1431 TREE_STATIC (decl) = 1;
1432 TREE_USED (decl) = 1;
1433 DECL_ARTIFICIAL (decl) = 1;
1434 DECL_IGNORED_P (decl) = 0;
1435 TREE_PUBLIC (decl) = 0;
1436 DECL_UNINLINABLE (decl) = 1;
1437 DECL_EXTERNAL (decl) = 0;
1438 DECL_CONTEXT (decl) = NULL_TREE;
1439 DECL_INITIAL (decl) = make_node (BLOCK);
1440
9ff70652 1441 t = build_decl (loc, RESULT_DECL, NULL_TREE, void_type_node);
5f40b3cb
ZD
1442 DECL_ARTIFICIAL (t) = 1;
1443 DECL_IGNORED_P (t) = 1;
1444 DECL_RESULT (decl) = t;
1445
9ff70652 1446 t = build_decl (loc, PARM_DECL, get_identifier (".paral_data_param"),
5f40b3cb
ZD
1447 ptr_type_node);
1448 DECL_ARTIFICIAL (t) = 1;
1449 DECL_ARG_TYPE (t) = ptr_type_node;
1450 DECL_CONTEXT (t) = decl;
1451 TREE_USED (t) = 1;
1452 DECL_ARGUMENTS (decl) = t;
1453
182e0d71 1454 allocate_struct_function (decl, false);
5f40b3cb
ZD
1455
1456 /* The call to allocate_struct_function clobbers CFUN, so we need to restore
1457 it. */
5576d6f2 1458 set_cfun (act_cfun);
5f40b3cb
ZD
1459
1460 return decl;
1461}
1462
5f40b3cb
ZD
1463/* Moves the exit condition of LOOP to the beginning of its header, and
1464 duplicates the part of the last iteration that gets disabled to the
1465 exit of the loop. NIT is the number of iterations of the loop
1466 (used to initialize the variables in the duplicated part).
b8698a0f 1467
fa10beec 1468 TODO: the common case is that latch of the loop is empty and immediately
5f40b3cb
ZD
1469 follows the loop exit. In this case, it would be better not to copy the
1470 body of the loop, but only move the entry of the loop directly before the
1471 exit check and increase the number of iterations of the loop by one.
b8698a0f 1472 This may need some additional preconditioning in case NIT = ~0.
a509ebb5 1473 REDUCTION_LIST describes the reductions in LOOP. */
5f40b3cb
ZD
1474
1475static void
a509ebb5 1476transform_to_exit_first_loop (struct loop *loop, htab_t reduction_list, tree nit)
5f40b3cb
ZD
1477{
1478 basic_block *bbs, *nbbs, ex_bb, orig_header;
1479 unsigned n;
1480 bool ok;
1481 edge exit = single_dom_exit (loop), hpred;
726a989a 1482 tree control, control_name, res, t;
48710229 1483 gimple phi, nphi, cond_stmt, stmt, cond_nit;
726a989a 1484 gimple_stmt_iterator gsi;
48710229 1485 tree nit_1;
5f40b3cb
ZD
1486
1487 split_block_after_labels (loop->header);
1488 orig_header = single_succ (loop->header);
1489 hpred = single_succ_edge (loop->header);
1490
1491 cond_stmt = last_stmt (exit->src);
726a989a
RB
1492 control = gimple_cond_lhs (cond_stmt);
1493 gcc_assert (gimple_cond_rhs (cond_stmt) == nit);
5f40b3cb
ZD
1494
1495 /* Make sure that we have phi nodes on exit for all loop header phis
1496 (create_parallel_loop requires that). */
726a989a 1497 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
5f40b3cb 1498 {
726a989a 1499 phi = gsi_stmt (gsi);
5f40b3cb
ZD
1500 res = PHI_RESULT (phi);
1501 t = make_ssa_name (SSA_NAME_VAR (res), phi);
1502 SET_PHI_RESULT (phi, t);
5f40b3cb
ZD
1503 nphi = create_phi_node (res, orig_header);
1504 SSA_NAME_DEF_STMT (res) = nphi;
e53a3e77 1505 add_phi_arg (nphi, t, hpred, UNKNOWN_LOCATION, NULL);
5f40b3cb
ZD
1506
1507 if (res == control)
1508 {
726a989a 1509 gimple_cond_set_lhs (cond_stmt, t);
5f40b3cb
ZD
1510 update_stmt (cond_stmt);
1511 control = t;
1512 }
1513 }
12037899 1514
5f40b3cb 1515 bbs = get_loop_body_in_dom_order (loop);
48710229 1516
69958396
RL
1517 for (n = 0; bbs[n] != exit->src; n++)
1518 continue;
5f40b3cb 1519 nbbs = XNEWVEC (basic_block, n);
726a989a
RB
1520 ok = gimple_duplicate_sese_tail (single_succ_edge (loop->header), exit,
1521 bbs + 1, n, nbbs);
5f40b3cb
ZD
1522 gcc_assert (ok);
1523 free (bbs);
1524 ex_bb = nbbs[0];
1525 free (nbbs);
1526
b8698a0f 1527 /* Other than reductions, the only gimple reg that should be copied
726a989a 1528 out of the loop is the control variable. */
69958396 1529 exit = single_dom_exit (loop);
5f40b3cb 1530 control_name = NULL_TREE;
726a989a 1531 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); )
5f40b3cb 1532 {
726a989a 1533 phi = gsi_stmt (gsi);
5f40b3cb
ZD
1534 res = PHI_RESULT (phi);
1535 if (!is_gimple_reg (res))
726a989a
RB
1536 {
1537 gsi_next (&gsi);
1538 continue;
1539 }
5f40b3cb 1540
a509ebb5 1541 /* Check if it is a part of reduction. If it is,
b8698a0f
L
1542 keep the phi at the reduction's keep_res field. The
1543 PHI_RESULT of this phi is the resulting value of the reduction
a509ebb5
RL
1544 variable when exiting the loop. */
1545
b8698a0f 1546 if (htab_elements (reduction_list) > 0)
a509ebb5
RL
1547 {
1548 struct reduction_info *red;
1549
1550 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
a509ebb5
RL
1551 red = reduction_phi (reduction_list, SSA_NAME_DEF_STMT (val));
1552 if (red)
726a989a
RB
1553 {
1554 red->keep_res = phi;
1555 gsi_next (&gsi);
1556 continue;
1557 }
a509ebb5 1558 }
726a989a
RB
1559 gcc_assert (control_name == NULL_TREE
1560 && SSA_NAME_VAR (res) == SSA_NAME_VAR (control));
5f40b3cb 1561 control_name = res;
726a989a 1562 remove_phi_node (&gsi, false);
5f40b3cb
ZD
1563 }
1564 gcc_assert (control_name != NULL_TREE);
5f40b3cb 1565
b8698a0f 1566 /* Initialize the control variable to number of iterations
48710229 1567 according to the rhs of the exit condition. */
726a989a 1568 gsi = gsi_after_labels (ex_bb);
b8698a0f 1569 cond_nit = last_stmt (exit->src);
48710229
RL
1570 nit_1 = gimple_cond_rhs (cond_nit);
1571 nit_1 = force_gimple_operand_gsi (&gsi,
1572 fold_convert (TREE_TYPE (control_name), nit_1),
726a989a 1573 false, NULL_TREE, false, GSI_SAME_STMT);
48710229 1574 stmt = gimple_build_assign (control_name, nit_1);
726a989a
RB
1575 gsi_insert_before (&gsi, stmt, GSI_NEW_STMT);
1576 SSA_NAME_DEF_STMT (control_name) = stmt;
5f40b3cb
ZD
1577}
1578
1579/* Create the parallel constructs for LOOP as described in gen_parallel_loop.
726a989a 1580 LOOP_FN and DATA are the arguments of GIMPLE_OMP_PARALLEL.
5f40b3cb
ZD
1581 NEW_DATA is the variable that should be initialized from the argument
1582 of LOOP_FN. N_THREADS is the requested number of threads. Returns the
726a989a 1583 basic block containing GIMPLE_OMP_PARALLEL tree. */
5f40b3cb
ZD
1584
1585static basic_block
1586create_parallel_loop (struct loop *loop, tree loop_fn, tree data,
9ff70652 1587 tree new_data, unsigned n_threads, location_t loc)
5f40b3cb 1588{
726a989a 1589 gimple_stmt_iterator gsi;
5f40b3cb 1590 basic_block bb, paral_bb, for_bb, ex_bb;
0f900dfa 1591 tree t, param;
726a989a
RB
1592 gimple stmt, for_stmt, phi, cond_stmt;
1593 tree cvar, cvar_init, initvar, cvar_next, cvar_base, type;
5f40b3cb
ZD
1594 edge exit, nexit, guard, end, e;
1595
726a989a 1596 /* Prepare the GIMPLE_OMP_PARALLEL statement. */
5f40b3cb
ZD
1597 bb = loop_preheader_edge (loop)->src;
1598 paral_bb = single_pred (bb);
726a989a 1599 gsi = gsi_last_bb (paral_bb);
5f40b3cb 1600
9ff70652 1601 t = build_omp_clause (loc, OMP_CLAUSE_NUM_THREADS);
5f40b3cb 1602 OMP_CLAUSE_NUM_THREADS_EXPR (t)
a509ebb5 1603 = build_int_cst (integer_type_node, n_threads);
726a989a 1604 stmt = gimple_build_omp_parallel (NULL, t, loop_fn, data);
9ff70652 1605 gimple_set_location (stmt, loc);
5f40b3cb 1606
726a989a 1607 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1608
1609 /* Initialize NEW_DATA. */
1610 if (data)
1611 {
726a989a
RB
1612 gsi = gsi_after_labels (bb);
1613
1614 param = make_ssa_name (DECL_ARGUMENTS (loop_fn), NULL);
1615 stmt = gimple_build_assign (param, build_fold_addr_expr (data));
1616 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1617 SSA_NAME_DEF_STMT (param) = stmt;
1618
1619 stmt = gimple_build_assign (new_data,
1620 fold_convert (TREE_TYPE (new_data), param));
1621 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
1622 SSA_NAME_DEF_STMT (new_data) = stmt;
5f40b3cb
ZD
1623 }
1624
726a989a 1625 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_PARALLEL. */
5f40b3cb 1626 bb = split_loop_exit_edge (single_dom_exit (loop));
726a989a 1627 gsi = gsi_last_bb (bb);
9ff70652
JJ
1628 stmt = gimple_build_omp_return (false);
1629 gimple_set_location (stmt, loc);
1630 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb 1631
726a989a 1632 /* Extract data for GIMPLE_OMP_FOR. */
5f40b3cb 1633 gcc_assert (loop->header == single_dom_exit (loop)->src);
726a989a 1634 cond_stmt = last_stmt (loop->header);
5f40b3cb 1635
726a989a 1636 cvar = gimple_cond_lhs (cond_stmt);
5f40b3cb
ZD
1637 cvar_base = SSA_NAME_VAR (cvar);
1638 phi = SSA_NAME_DEF_STMT (cvar);
1639 cvar_init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
726a989a 1640 initvar = make_ssa_name (cvar_base, NULL);
5f40b3cb
ZD
1641 SET_USE (PHI_ARG_DEF_PTR_FROM_EDGE (phi, loop_preheader_edge (loop)),
1642 initvar);
1643 cvar_next = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1644
1dff453d 1645 gsi = gsi_last_nondebug_bb (loop->latch);
726a989a
RB
1646 gcc_assert (gsi_stmt (gsi) == SSA_NAME_DEF_STMT (cvar_next));
1647 gsi_remove (&gsi, true);
5f40b3cb
ZD
1648
1649 /* Prepare cfg. */
1650 for_bb = split_edge (loop_preheader_edge (loop));
1651 ex_bb = split_loop_exit_edge (single_dom_exit (loop));
1652 extract_true_false_edges_from_block (loop->header, &nexit, &exit);
1653 gcc_assert (exit == single_dom_exit (loop));
1654
1655 guard = make_edge (for_bb, ex_bb, 0);
1656 single_succ_edge (loop->latch)->flags = 0;
1657 end = make_edge (loop->latch, ex_bb, EDGE_FALLTHRU);
726a989a 1658 for (gsi = gsi_start_phis (ex_bb); !gsi_end_p (gsi); gsi_next (&gsi))
5f40b3cb 1659 {
f5045c96 1660 source_location locus;
e53a3e77 1661 tree block;
f5045c96 1662 tree def;
726a989a 1663 phi = gsi_stmt (gsi);
726a989a 1664 stmt = SSA_NAME_DEF_STMT (PHI_ARG_DEF_FROM_EDGE (phi, exit));
f5045c96
AM
1665
1666 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_preheader_edge (loop));
b8698a0f 1667 locus = gimple_phi_arg_location_from_edge (stmt,
f5045c96 1668 loop_preheader_edge (loop));
e53a3e77
DC
1669 block = gimple_phi_arg_block_from_edge (stmt,
1670 loop_preheader_edge (loop));
1671 add_phi_arg (phi, def, guard, locus, block);
f5045c96
AM
1672
1673 def = PHI_ARG_DEF_FROM_EDGE (stmt, loop_latch_edge (loop));
1674 locus = gimple_phi_arg_location_from_edge (stmt, loop_latch_edge (loop));
e53a3e77
DC
1675 block = gimple_phi_arg_block_from_edge (stmt, loop_latch_edge (loop));
1676 add_phi_arg (phi, def, end, locus, block);
5f40b3cb
ZD
1677 }
1678 e = redirect_edge_and_branch (exit, nexit->dest);
1679 PENDING_STMT (e) = NULL;
1680
726a989a
RB
1681 /* Emit GIMPLE_OMP_FOR. */
1682 gimple_cond_set_lhs (cond_stmt, cvar_base);
5f40b3cb 1683 type = TREE_TYPE (cvar);
9ff70652 1684 t = build_omp_clause (loc, OMP_CLAUSE_SCHEDULE);
5f40b3cb
ZD
1685 OMP_CLAUSE_SCHEDULE_KIND (t) = OMP_CLAUSE_SCHEDULE_STATIC;
1686
726a989a 1687 for_stmt = gimple_build_omp_for (NULL, t, 1, NULL);
9ff70652 1688 gimple_set_location (for_stmt, loc);
726a989a
RB
1689 gimple_omp_for_set_index (for_stmt, 0, initvar);
1690 gimple_omp_for_set_initial (for_stmt, 0, cvar_init);
1691 gimple_omp_for_set_final (for_stmt, 0, gimple_cond_rhs (cond_stmt));
1692 gimple_omp_for_set_cond (for_stmt, 0, gimple_cond_code (cond_stmt));
1693 gimple_omp_for_set_incr (for_stmt, 0, build2 (PLUS_EXPR, type,
1694 cvar_base,
1695 build_int_cst (type, 1)));
1696
1697 gsi = gsi_last_bb (for_bb);
1698 gsi_insert_after (&gsi, for_stmt, GSI_NEW_STMT);
5f40b3cb
ZD
1699 SSA_NAME_DEF_STMT (initvar) = for_stmt;
1700
726a989a
RB
1701 /* Emit GIMPLE_OMP_CONTINUE. */
1702 gsi = gsi_last_bb (loop->latch);
1703 stmt = gimple_build_omp_continue (cvar_next, cvar);
9ff70652 1704 gimple_set_location (stmt, loc);
726a989a
RB
1705 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1706 SSA_NAME_DEF_STMT (cvar_next) = stmt;
5f40b3cb 1707
726a989a
RB
1708 /* Emit GIMPLE_OMP_RETURN for GIMPLE_OMP_FOR. */
1709 gsi = gsi_last_bb (ex_bb);
9ff70652
JJ
1710 stmt = gimple_build_omp_return (true);
1711 gimple_set_location (stmt, loc);
1712 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
5f40b3cb 1713
cd7d9fd7
RG
1714 /* After the above dom info is hosed. Re-compute it. */
1715 free_dominance_info (CDI_DOMINATORS);
1716 calculate_dominance_info (CDI_DOMINATORS);
1717
5f40b3cb
ZD
1718 return paral_bb;
1719}
1720
08dab97a
RL
1721/* Generates code to execute the iterations of LOOP in N_THREADS
1722 threads in parallel.
1723
1724 NITER describes number of iterations of LOOP.
fa10beec 1725 REDUCTION_LIST describes the reductions existent in the LOOP. */
5f40b3cb
ZD
1726
1727static void
08dab97a 1728gen_parallel_loop (struct loop *loop, htab_t reduction_list,
a509ebb5 1729 unsigned n_threads, struct tree_niter_desc *niter)
5f40b3cb 1730{
9326236d 1731 loop_iterator li;
5f40b3cb 1732 tree many_iterations_cond, type, nit;
726a989a
RB
1733 tree arg_struct, new_arg_struct;
1734 gimple_seq stmts;
5f40b3cb 1735 basic_block parallel_head;
9f9f72aa 1736 edge entry, exit;
a509ebb5 1737 struct clsn_data clsn_data;
5f40b3cb 1738 unsigned prob;
9ff70652
JJ
1739 location_t loc;
1740 gimple cond_stmt;
768da0da 1741 unsigned int m_p_thread=2;
5f40b3cb
ZD
1742
1743 /* From
1744
1745 ---------------------------------------------------------------------
1746 loop
1747 {
1748 IV = phi (INIT, IV + STEP)
1749 BODY1;
1750 if (COND)
1751 break;
1752 BODY2;
1753 }
1754 ---------------------------------------------------------------------
1755
1756 with # of iterations NITER (possibly with MAY_BE_ZERO assumption),
1757 we generate the following code:
1758
1759 ---------------------------------------------------------------------
1760
1761 if (MAY_BE_ZERO
a509ebb5
RL
1762 || NITER < MIN_PER_THREAD * N_THREADS)
1763 goto original;
5f40b3cb
ZD
1764
1765 BODY1;
1766 store all local loop-invariant variables used in body of the loop to DATA.
726a989a 1767 GIMPLE_OMP_PARALLEL (OMP_CLAUSE_NUM_THREADS (N_THREADS), LOOPFN, DATA);
5f40b3cb 1768 load the variables from DATA.
726a989a 1769 GIMPLE_OMP_FOR (IV = INIT; COND; IV += STEP) (OMP_CLAUSE_SCHEDULE (static))
5f40b3cb
ZD
1770 BODY2;
1771 BODY1;
726a989a
RB
1772 GIMPLE_OMP_CONTINUE;
1773 GIMPLE_OMP_RETURN -- GIMPLE_OMP_FOR
1774 GIMPLE_OMP_RETURN -- GIMPLE_OMP_PARALLEL
5f40b3cb
ZD
1775 goto end;
1776
1777 original:
1778 loop
1779 {
1780 IV = phi (INIT, IV + STEP)
1781 BODY1;
1782 if (COND)
1783 break;
1784 BODY2;
1785 }
1786
1787 end:
1788
1789 */
1790
1791 /* Create two versions of the loop -- in the old one, we know that the
1792 number of iterations is large enough, and we will transform it into the
1793 loop that will be split to loop_fn, the new one will be used for the
1794 remaining iterations. */
a509ebb5 1795
768da0da
RL
1796 /* We should compute a better number-of-iterations value for outer loops.
1797 That is, if we have
1798
1799 for (i = 0; i < n; ++i)
1800 for (j = 0; j < m; ++j)
1801 ...
1802
1803 we should compute nit = n * m, not nit = n.
1804 Also may_be_zero handling would need to be adjusted. */
1805
5f40b3cb
ZD
1806 type = TREE_TYPE (niter->niter);
1807 nit = force_gimple_operand (unshare_expr (niter->niter), &stmts, true,
1808 NULL_TREE);
1809 if (stmts)
726a989a 1810 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb 1811
768da0da
RL
1812 if (loop->inner)
1813 m_p_thread=2;
1814 else
1815 m_p_thread=MIN_PER_THREAD;
1816
1817 many_iterations_cond =
1818 fold_build2 (GE_EXPR, boolean_type_node,
1819 nit, build_int_cst (type, m_p_thread * n_threads));
1820
5f40b3cb 1821 many_iterations_cond
a509ebb5
RL
1822 = fold_build2 (TRUTH_AND_EXPR, boolean_type_node,
1823 invert_truthvalue (unshare_expr (niter->may_be_zero)),
1824 many_iterations_cond);
5f40b3cb 1825 many_iterations_cond
a509ebb5 1826 = force_gimple_operand (many_iterations_cond, &stmts, false, NULL_TREE);
5f40b3cb 1827 if (stmts)
726a989a 1828 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb
ZD
1829 if (!is_gimple_condexpr (many_iterations_cond))
1830 {
1831 many_iterations_cond
a509ebb5
RL
1832 = force_gimple_operand (many_iterations_cond, &stmts,
1833 true, NULL_TREE);
5f40b3cb 1834 if (stmts)
726a989a 1835 gsi_insert_seq_on_edge_immediate (loop_preheader_edge (loop), stmts);
5f40b3cb
ZD
1836 }
1837
1838 initialize_original_copy_tables ();
1839
1840 /* We assume that the loop usually iterates a lot. */
1841 prob = 4 * REG_BR_PROB_BASE / 5;
0f900dfa
JJ
1842 loop_version (loop, many_iterations_cond, NULL,
1843 prob, prob, REG_BR_PROB_BASE - prob, true);
5f40b3cb
ZD
1844 update_ssa (TODO_update_ssa);
1845 free_original_copy_tables ();
1846
1847 /* Base all the induction variables in LOOP on a single control one. */
c80a5403 1848 canonicalize_loop_ivs (loop, &nit, true);
5f40b3cb
ZD
1849
1850 /* Ensure that the exit condition is the first statement in the loop. */
a509ebb5
RL
1851 transform_to_exit_first_loop (loop, reduction_list, nit);
1852
fa10beec 1853 /* Generate initializations for reductions. */
b8698a0f 1854 if (htab_elements (reduction_list) > 0)
a509ebb5 1855 htab_traverse (reduction_list, initialize_reductions, loop);
5f40b3cb
ZD
1856
1857 /* Eliminate the references to local variables from the loop. */
9f9f72aa
AP
1858 gcc_assert (single_exit (loop));
1859 entry = loop_preheader_edge (loop);
1860 exit = single_dom_exit (loop);
5f40b3cb 1861
9f9f72aa 1862 eliminate_local_variables (entry, exit);
5f40b3cb
ZD
1863 /* In the old loop, move all variables non-local to the loop to a structure
1864 and back, and create separate decls for the variables used in loop. */
b8698a0f 1865 separate_decls_in_region (entry, exit, reduction_list, &arg_struct,
9f9f72aa 1866 &new_arg_struct, &clsn_data);
5f40b3cb
ZD
1867
1868 /* Create the parallel constructs. */
9ff70652
JJ
1869 loc = UNKNOWN_LOCATION;
1870 cond_stmt = last_stmt (loop->header);
1871 if (cond_stmt)
1872 loc = gimple_location (cond_stmt);
1873 parallel_head = create_parallel_loop (loop, create_loop_fn (loc), arg_struct,
1874 new_arg_struct, n_threads, loc);
b8698a0f 1875 if (htab_elements (reduction_list) > 0)
a509ebb5 1876 create_call_for_reduction (loop, reduction_list, &clsn_data);
5f40b3cb
ZD
1877
1878 scev_reset ();
1879
1880 /* Cancel the loop (it is simpler to do it here rather than to teach the
1881 expander to do it). */
1882 cancel_loop_tree (loop);
1883
92a6bdbd
SP
1884 /* Free loop bound estimations that could contain references to
1885 removed statements. */
1886 FOR_EACH_LOOP (li, loop, 0)
1887 free_numbers_of_iterations_estimates_loop (loop);
1888
5f40b3cb
ZD
1889 /* Expand the parallel constructs. We do it directly here instead of running
1890 a separate expand_omp pass, since it is more efficient, and less likely to
1891 cause troubles with further analyses not being able to deal with the
1892 OMP trees. */
a509ebb5 1893
5f40b3cb
ZD
1894 omp_expand_local (parallel_head);
1895}
1896
9857228c
SP
1897/* Returns true when LOOP contains vector phi nodes. */
1898
1899static bool
726a989a 1900loop_has_vector_phi_nodes (struct loop *loop ATTRIBUTE_UNUSED)
9857228c
SP
1901{
1902 unsigned i;
1903 basic_block *bbs = get_loop_body_in_dom_order (loop);
726a989a 1904 gimple_stmt_iterator gsi;
9857228c 1905 bool res = true;
9857228c
SP
1906
1907 for (i = 0; i < loop->num_nodes; i++)
726a989a
RB
1908 for (gsi = gsi_start_phis (bbs[i]); !gsi_end_p (gsi); gsi_next (&gsi))
1909 if (TREE_CODE (TREE_TYPE (PHI_RESULT (gsi_stmt (gsi)))) == VECTOR_TYPE)
9857228c
SP
1910 goto end;
1911
1912 res = false;
1913 end:
1914 free (bbs);
1915 return res;
1916}
1917
08dab97a
RL
1918/* Create a reduction_info struct, initialize it with REDUC_STMT
1919 and PHI, insert it to the REDUCTION_LIST. */
1920
1921static void
1922build_new_reduction (htab_t reduction_list, gimple reduc_stmt, gimple phi)
1923{
1924 PTR *slot;
1925 struct reduction_info *new_reduction;
1926
1927 gcc_assert (reduc_stmt);
b8698a0f 1928
08dab97a
RL
1929 if (dump_file && (dump_flags & TDF_DETAILS))
1930 {
1931 fprintf (dump_file,
1932 "Detected reduction. reduction stmt is: \n");
1933 print_gimple_stmt (dump_file, reduc_stmt, 0, 0);
1934 fprintf (dump_file, "\n");
1935 }
b8698a0f 1936
08dab97a 1937 new_reduction = XCNEW (struct reduction_info);
b8698a0f 1938
08dab97a
RL
1939 new_reduction->reduc_stmt = reduc_stmt;
1940 new_reduction->reduc_phi = phi;
5d1fd1de 1941 new_reduction->reduc_version = SSA_NAME_VERSION (gimple_phi_result (phi));
08dab97a
RL
1942 new_reduction->reduction_code = gimple_assign_rhs_code (reduc_stmt);
1943 slot = htab_find_slot (reduction_list, new_reduction, INSERT);
1944 *slot = new_reduction;
1945}
1946
5d1fd1de
JJ
1947/* Callback for htab_traverse. Sets gimple_uid of reduc_phi stmts. */
1948
1949static int
1950set_reduc_phi_uids (void **slot, void *data ATTRIBUTE_UNUSED)
1951{
1952 struct reduction_info *const red = (struct reduction_info *) *slot;
1953 gimple_set_uid (red->reduc_phi, red->reduc_version);
1954 return 1;
1955}
1956
08dab97a
RL
1957/* Detect all reductions in the LOOP, insert them into REDUCTION_LIST. */
1958
1959static void
1960gather_scalar_reductions (loop_p loop, htab_t reduction_list)
1961{
1962 gimple_stmt_iterator gsi;
1963 loop_vec_info simple_loop_info;
1964
1965 vect_dump = NULL;
1966 simple_loop_info = vect_analyze_loop_form (loop);
1967
1968 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
1969 {
1970 gimple phi = gsi_stmt (gsi);
1971 affine_iv iv;
1972 tree res = PHI_RESULT (phi);
1973 bool double_reduc;
1974
1975 if (!is_gimple_reg (res))
1976 continue;
1977
1978 if (!simple_iv (loop, loop, res, &iv, true)
1979 && simple_loop_info)
1980 {
8a9ecffd
MM
1981 gimple reduc_stmt = vect_force_simple_reduction (simple_loop_info,
1982 phi, true,
1983 &double_reduc);
48710229 1984 if (reduc_stmt && !double_reduc)
08dab97a
RL
1985 build_new_reduction (reduction_list, reduc_stmt, phi);
1986 }
1987 }
5d1fd1de
JJ
1988 destroy_loop_vec_info (simple_loop_info, true);
1989
1990 /* As gimple_uid is used by the vectorizer in between vect_analyze_loop_form
1991 and destroy_loop_vec_info, we can set gimple_uid of reduc_phi stmts
1992 only now. */
1993 htab_traverse (reduction_list, set_reduc_phi_uids, NULL);
08dab97a
RL
1994}
1995
1996/* Try to initialize NITER for code generation part. */
1997
1998static bool
1999try_get_loop_niter (loop_p loop, struct tree_niter_desc *niter)
2000{
2001 edge exit = single_dom_exit (loop);
2002
2003 gcc_assert (exit);
2004
2005 /* We need to know # of iterations, and there should be no uses of values
2006 defined inside loop outside of it, unless the values are invariants of
2007 the loop. */
2008 if (!number_of_iterations_exit (loop, exit, niter, false))
2009 {
2010 if (dump_file && (dump_flags & TDF_DETAILS))
2011 fprintf (dump_file, " FAILED: number of iterations not known\n");
2012 return false;
2013 }
2014
2015 return true;
2016}
2017
2018/* Try to initialize REDUCTION_LIST for code generation part.
2019 REDUCTION_LIST describes the reductions. */
2020
2021static bool
2022try_create_reduction_list (loop_p loop, htab_t reduction_list)
2023{
2024 edge exit = single_dom_exit (loop);
2025 gimple_stmt_iterator gsi;
2026
2027 gcc_assert (exit);
2028
2029 gather_scalar_reductions (loop, reduction_list);
2030
b8698a0f 2031
08dab97a
RL
2032 for (gsi = gsi_start_phis (exit->dest); !gsi_end_p (gsi); gsi_next (&gsi))
2033 {
2034 gimple phi = gsi_stmt (gsi);
2035 struct reduction_info *red;
2036 imm_use_iterator imm_iter;
2037 use_operand_p use_p;
2038 gimple reduc_phi;
2039 tree val = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2040
2041 if (is_gimple_reg (val))
2042 {
2043 if (dump_file && (dump_flags & TDF_DETAILS))
2044 {
2045 fprintf (dump_file, "phi is ");
2046 print_gimple_stmt (dump_file, phi, 0, 0);
2047 fprintf (dump_file, "arg of phi to exit: value ");
2048 print_generic_expr (dump_file, val, 0);
2049 fprintf (dump_file, " used outside loop\n");
2050 fprintf (dump_file,
2051 " checking if it a part of reduction pattern: \n");
2052 }
2053 if (htab_elements (reduction_list) == 0)
2054 {
2055 if (dump_file && (dump_flags & TDF_DETAILS))
2056 fprintf (dump_file,
2057 " FAILED: it is not a part of reduction.\n");
2058 return false;
2059 }
2060 reduc_phi = NULL;
2061 FOR_EACH_IMM_USE_FAST (use_p, imm_iter, val)
2062 {
4942af9b
JJ
2063 if (!gimple_debug_bind_p (USE_STMT (use_p))
2064 && flow_bb_inside_loop_p (loop, gimple_bb (USE_STMT (use_p))))
08dab97a
RL
2065 {
2066 reduc_phi = USE_STMT (use_p);
2067 break;
2068 }
2069 }
2070 red = reduction_phi (reduction_list, reduc_phi);
2071 if (red == NULL)
2072 {
2073 if (dump_file && (dump_flags & TDF_DETAILS))
2074 fprintf (dump_file,
2075 " FAILED: it is not a part of reduction.\n");
2076 return false;
2077 }
2078 if (dump_file && (dump_flags & TDF_DETAILS))
2079 {
2080 fprintf (dump_file, "reduction phi is ");
2081 print_gimple_stmt (dump_file, red->reduc_phi, 0, 0);
2082 fprintf (dump_file, "reduction stmt is ");
2083 print_gimple_stmt (dump_file, red->reduc_stmt, 0, 0);
2084 }
2085 }
2086 }
2087
2088 /* The iterations of the loop may communicate only through bivs whose
2089 iteration space can be distributed efficiently. */
2090 for (gsi = gsi_start_phis (loop->header); !gsi_end_p (gsi); gsi_next (&gsi))
2091 {
2092 gimple phi = gsi_stmt (gsi);
2093 tree def = PHI_RESULT (phi);
2094 affine_iv iv;
2095
2096 if (is_gimple_reg (def) && !simple_iv (loop, loop, def, &iv, true))
2097 {
2098 struct reduction_info *red;
2099
2100 red = reduction_phi (reduction_list, phi);
2101 if (red == NULL)
2102 {
2103 if (dump_file && (dump_flags & TDF_DETAILS))
2104 fprintf (dump_file,
2105 " FAILED: scalar dependency between iterations\n");
2106 return false;
2107 }
2108 }
2109 }
2110
2111
2112 return true;
2113}
2114
5f40b3cb
ZD
2115/* Detect parallel loops and generate parallel code using libgomp
2116 primitives. Returns true if some loop was parallelized, false
2117 otherwise. */
2118
2119bool
2120parallelize_loops (void)
2121{
2122 unsigned n_threads = flag_tree_parallelize_loops;
2123 bool changed = false;
2124 struct loop *loop;
2125 struct tree_niter_desc niter_desc;
2126 loop_iterator li;
a509ebb5 2127 htab_t reduction_list;
f873b205 2128 struct obstack parloop_obstack;
8adfe01d
RL
2129 HOST_WIDE_INT estimated;
2130 LOC loop_loc;
f873b205 2131
5f40b3cb
ZD
2132 /* Do not parallelize loops in the functions created by parallelization. */
2133 if (parallelized_function_p (cfun->decl))
2134 return false;
8adfe01d
RL
2135 if (cfun->has_nonlocal_label)
2136 return false;
5f40b3cb 2137
f873b205 2138 gcc_obstack_init (&parloop_obstack);
a509ebb5 2139 reduction_list = htab_create (10, reduction_info_hash,
08dab97a 2140 reduction_info_eq, free);
726a989a 2141 init_stmt_vec_info_vec ();
a509ebb5 2142
5f40b3cb
ZD
2143 FOR_EACH_LOOP (li, loop, 0)
2144 {
a509ebb5 2145 htab_empty (reduction_list);
48710229
RL
2146 if (dump_file && (dump_flags & TDF_DETAILS))
2147 {
2148 fprintf (dump_file, "Trying loop %d as candidate\n",loop->num);
2149 if (loop->inner)
2150 fprintf (dump_file, "loop %d is not innermost\n",loop->num);
2151 else
2152 fprintf (dump_file, "loop %d is innermost\n",loop->num);
2153 }
b8698a0f 2154
48710229 2155 /* If we use autopar in graphite pass, we use its marked dependency
87d4d0ee
SP
2156 checking results. */
2157 if (flag_loop_parallelize_all && !loop->can_be_parallel)
48710229
RL
2158 {
2159 if (dump_file && (dump_flags & TDF_DETAILS))
2160 fprintf (dump_file, "loop is not parallel according to graphite\n");
87d4d0ee 2161 continue;
48710229 2162 }
87d4d0ee 2163
48710229
RL
2164 if (!single_dom_exit (loop))
2165 {
b8698a0f 2166
48710229
RL
2167 if (dump_file && (dump_flags & TDF_DETAILS))
2168 fprintf (dump_file, "loop is !single_dom_exit\n");
b8698a0f 2169
08dab97a 2170 continue;
48710229 2171 }
08dab97a
RL
2172
2173 if (/* And of course, the loop must be parallelizable. */
2174 !can_duplicate_loop_p (loop)
1d4af1e8 2175 || loop_has_blocks_with_irreducible_flag (loop)
8adfe01d 2176 || (loop_preheader_edge (loop)->src->flags & BB_IRREDUCIBLE_LOOP)
9857228c 2177 /* FIXME: the check for vector phi nodes could be removed. */
69958396 2178 || loop_has_vector_phi_nodes (loop))
08dab97a 2179 continue;
e5b332cd 2180
652c4c71 2181 estimated = estimated_stmt_executions_int (loop);
e5b332cd
RG
2182 if (estimated == -1)
2183 estimated = max_stmt_executions_int (loop);
87d4d0ee 2184 /* FIXME: Bypass this check as graphite doesn't update the
e5b332cd 2185 count and frequency correctly now. */
87d4d0ee 2186 if (!flag_loop_parallelize_all
e5b332cd
RG
2187 && ((estimated != -1
2188 && estimated <= (HOST_WIDE_INT) n_threads * MIN_PER_THREAD)
87d4d0ee
SP
2189 /* Do not bother with loops in cold areas. */
2190 || optimize_loop_nest_for_size_p (loop)))
08dab97a 2191 continue;
b8698a0f 2192
08dab97a
RL
2193 if (!try_get_loop_niter (loop, &niter_desc))
2194 continue;
2195
2196 if (!try_create_reduction_list (loop, reduction_list))
2197 continue;
2198
f873b205
LB
2199 if (!flag_loop_parallelize_all
2200 && !loop_parallel_p (loop, &parloop_obstack))
5f40b3cb
ZD
2201 continue;
2202
2203 changed = true;
48710229
RL
2204 if (dump_file && (dump_flags & TDF_DETAILS))
2205 {
48710229 2206 if (loop->inner)
8adfe01d 2207 fprintf (dump_file, "parallelizing outer loop %d\n",loop->header->index);
48710229 2208 else
8adfe01d
RL
2209 fprintf (dump_file, "parallelizing inner loop %d\n",loop->header->index);
2210 loop_loc = find_loop_location (loop);
2211 if (loop_loc != UNKNOWN_LOC)
2212 fprintf (dump_file, "\nloop at %s:%d: ",
2213 LOC_FILE (loop_loc), LOC_LINE (loop_loc));
b8698a0f
L
2214 }
2215 gen_parallel_loop (loop, reduction_list,
08dab97a 2216 n_threads, &niter_desc);
510dbcce 2217#ifdef ENABLE_CHECKING
5f40b3cb 2218 verify_flow_info ();
5f40b3cb 2219 verify_loop_structure ();
a3b9e73c 2220 verify_loop_closed_ssa (true);
510dbcce 2221#endif
5f40b3cb
ZD
2222 }
2223
726a989a 2224 free_stmt_vec_info_vec ();
a509ebb5 2225 htab_delete (reduction_list);
f873b205 2226 obstack_free (&parloop_obstack, NULL);
6b8ed145
RG
2227
2228 /* Parallelization will cause new function calls to be inserted through
d086d311
RG
2229 which local variables will escape. Reset the points-to solution
2230 for ESCAPED. */
6b8ed145 2231 if (changed)
d086d311 2232 pt_solution_reset (&cfun->gimple_df->escaped);
6b8ed145 2233
5f40b3cb
ZD
2234 return changed;
2235}
2236
2237#include "gt-tree-parloops.h"